LLVM 23.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64.h"
23#include "AArch64ExpandImm.h"
26#include "AArch64Subtarget.h"
47#include "llvm/IR/InstrTypes.h"
49#include <optional>
50
51#define GET_GICOMBINER_DEPS
52#include "AArch64GenPostLegalizeGILowering.inc"
53#undef GET_GICOMBINER_DEPS
54
55#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
56
57using namespace llvm;
58using namespace MIPatternMatch;
59using namespace AArch64GISelUtils;
60
61#define GET_GICOMBINER_TYPES
62#include "AArch64GenPostLegalizeGILowering.inc"
63#undef GET_GICOMBINER_TYPES
64
65namespace {
66
67/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
68///
69/// Used for matching target-supported shuffles before codegen.
70struct ShuffleVectorPseudo {
71 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
72 Register Dst; ///< Destination register.
73 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
74 ShuffleVectorPseudo(unsigned Opc, Register Dst,
75 std::initializer_list<SrcOp> SrcOps)
76 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
77 ShuffleVectorPseudo() = default;
78};
79
80/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
81/// sources of the shuffle are different.
82std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
83 unsigned NumElts) {
84 // Look for the first non-undef element.
85 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
86 if (FirstRealElt == M.end())
87 return std::nullopt;
88
89 // Use APInt to handle overflow when calculating expected element.
90 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
91 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
92
93 // The following shuffle indices must be the successive elements after the
94 // first real element.
95 if (any_of(
96 make_range(std::next(FirstRealElt), M.end()),
97 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
98 return std::nullopt;
99
100 // The index of an EXT is the first element if it is not UNDEF.
101 // Watch out for the beginning UNDEFs. The EXT index should be the expected
102 // value of the first element. E.g.
103 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
104 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
105 // ExpectedElt is the last mask index plus 1.
106 uint64_t Imm = ExpectedElt.getZExtValue();
107 bool ReverseExt = false;
108
109 // There are two difference cases requiring to reverse input vectors.
110 // For example, for vector <4 x i32> we have the following cases,
111 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
112 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
113 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
114 // to reverse two input vectors.
115 if (Imm < NumElts)
116 ReverseExt = true;
117 else
118 Imm -= NumElts;
119 return std::make_pair(ReverseExt, Imm);
120}
121
122/// Helper function for matchINS.
123///
124/// \returns a value when \p M is an ins mask for \p NumInputElements.
125///
126/// First element of the returned pair is true when the produced
127/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
128///
129/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
130std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
131 int NumInputElements) {
132 if (M.size() != static_cast<size_t>(NumInputElements))
133 return std::nullopt;
134 int NumLHSMatch = 0, NumRHSMatch = 0;
135 int LastLHSMismatch = -1, LastRHSMismatch = -1;
136 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
137 if (M[Idx] == -1) {
138 ++NumLHSMatch;
139 ++NumRHSMatch;
140 continue;
141 }
142 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
143 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
144 }
145 const int NumNeededToMatch = NumInputElements - 1;
146 if (NumLHSMatch == NumNeededToMatch)
147 return std::make_pair(true, LastLHSMismatch);
148 if (NumRHSMatch == NumNeededToMatch)
149 return std::make_pair(false, LastRHSMismatch);
150 return std::nullopt;
151}
152
153/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
154/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
155bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
156 ShuffleVectorPseudo &MatchInfo) {
157 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
158 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
159 Register Dst = MI.getOperand(0).getReg();
160 Register Src = MI.getOperand(1).getReg();
161 LLT Ty = MRI.getType(Dst);
162 unsigned EltSize = Ty.getScalarSizeInBits();
163
164 // Element size for a rev cannot be 64.
165 if (EltSize == 64)
166 return false;
167
168 unsigned NumElts = Ty.getNumElements();
169
170 // Try to produce a G_REV instruction
171 for (unsigned LaneSize : {64U, 32U, 16U}) {
172 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
173 unsigned Opcode;
174 if (LaneSize == 64U)
175 Opcode = AArch64::G_REV64;
176 else if (LaneSize == 32U)
177 Opcode = AArch64::G_REV32;
178 else
179 Opcode = AArch64::G_BSWAP;
180
181 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
182 return true;
183 }
184 }
185
186 return false;
187}
188
189/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
190/// a G_TRN1 or G_TRN2 instruction.
191bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
192 ShuffleVectorPseudo &MatchInfo) {
193 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
194 unsigned WhichResult;
195 unsigned OperandOrder;
196 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
197 Register Dst = MI.getOperand(0).getReg();
198 unsigned NumElts = MRI.getType(Dst).getNumElements();
199 if (!isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
200 return false;
201 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
202 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
203 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
204 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
205 return true;
206}
207
208/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
209/// a G_UZP1 or G_UZP2 instruction.
210///
211/// \param [in] MI - The shuffle vector instruction.
212/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
213bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
214 ShuffleVectorPseudo &MatchInfo) {
215 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
216 unsigned WhichResult;
217 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
218 Register Dst = MI.getOperand(0).getReg();
219 unsigned NumElts = MRI.getType(Dst).getNumElements();
220 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
221 return false;
222 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
223 Register V1 = MI.getOperand(1).getReg();
224 Register V2 = MI.getOperand(2).getReg();
225 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
226 return true;
227}
228
229bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
230 ShuffleVectorPseudo &MatchInfo) {
231 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
232 unsigned WhichResult;
233 unsigned OperandOrder;
234 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
235 Register Dst = MI.getOperand(0).getReg();
236 unsigned NumElts = MRI.getType(Dst).getNumElements();
237 if (!isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
238 return false;
239 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
240 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
241 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
242 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
243 return true;
244}
245
246/// Helper function for matchDup.
247bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
249 ShuffleVectorPseudo &MatchInfo) {
250 if (Lane != 0)
251 return false;
252
253 // Try to match a vector splat operation into a dup instruction.
254 // We're looking for this pattern:
255 //
256 // %scalar:gpr(s64) = COPY $x0
257 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
258 // %cst0:gpr(s32) = G_CONSTANT i32 0
259 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
260 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
261 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
262 // %zerovec(<2 x s32>)
263 //
264 // ...into:
265 // %splat = G_DUP %scalar
266
267 // Begin matching the insert.
268 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
269 MI.getOperand(1).getReg(), MRI);
270 if (!InsMI)
271 return false;
272 // Match the undef vector operand.
273 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
274 MRI))
275 return false;
276
277 // Match the index constant 0.
278 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
279 return false;
280
281 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
282 {InsMI->getOperand(2).getReg()});
283 return true;
284}
285
286/// Helper function for matchDup.
287bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
289 ShuffleVectorPseudo &MatchInfo) {
290 assert(Lane >= 0 && "Expected positive lane?");
291 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
292 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
293 // lane's definition directly.
294 auto *BuildVecMI =
295 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
296 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
297 // If Lane >= NumElements then it is point to RHS, just check from RHS
298 if (NumElements <= Lane)
299 Lane -= NumElements;
300
301 if (!BuildVecMI)
302 return false;
303 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
304 MatchInfo =
305 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
306 return true;
307}
308
309bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
310 ShuffleVectorPseudo &MatchInfo) {
311 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
312 auto MaybeLane = getSplatIndex(MI);
313 if (!MaybeLane)
314 return false;
315 int Lane = *MaybeLane;
316 // If this is undef splat, generate it via "just" vdup, if possible.
317 if (Lane < 0)
318 Lane = 0;
319 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
320 return true;
321 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
322 return true;
323 return false;
324}
325
326// Check if an EXT instruction can handle the shuffle mask when the vector
327// sources of the shuffle are the same.
328bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
329 unsigned NumElts = Ty.getNumElements();
330
331 // Assume that the first shuffle index is not UNDEF. Fail if it is.
332 if (M[0] < 0)
333 return false;
334
335 // If this is a VEXT shuffle, the immediate value is the index of the first
336 // element. The other shuffle indices must be the successive elements after
337 // the first one.
338 unsigned ExpectedElt = M[0];
339 for (unsigned I = 1; I < NumElts; ++I) {
340 // Increment the expected index. If it wraps around, just follow it
341 // back to index zero and keep going.
342 ++ExpectedElt;
343 if (ExpectedElt == NumElts)
344 ExpectedElt = 0;
345
346 if (M[I] < 0)
347 continue; // Ignore UNDEF indices.
348 if (ExpectedElt != static_cast<unsigned>(M[I]))
349 return false;
350 }
351
352 return true;
353}
354
355bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
356 ShuffleVectorPseudo &MatchInfo) {
357 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
358 Register Dst = MI.getOperand(0).getReg();
359 LLT DstTy = MRI.getType(Dst);
360 Register V1 = MI.getOperand(1).getReg();
361 Register V2 = MI.getOperand(2).getReg();
362 auto Mask = MI.getOperand(3).getShuffleMask();
363 uint64_t Imm;
364 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
365 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
366
367 if (!ExtInfo) {
368 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
369 !isSingletonExtMask(Mask, DstTy))
370 return false;
371
372 Imm = Mask[0] * ExtFactor;
373 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
374 return true;
375 }
376 bool ReverseExt;
377 std::tie(ReverseExt, Imm) = *ExtInfo;
378 if (ReverseExt)
379 std::swap(V1, V2);
380 Imm *= ExtFactor;
381 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
382 return true;
383}
384
385/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
386/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
387void applyShuffleVectorPseudo(MachineInstr &MI, MachineRegisterInfo &MRI,
388 ShuffleVectorPseudo &MatchInfo) {
389 MachineIRBuilder MIRBuilder(MI);
390 if (MatchInfo.Opc == TargetOpcode::G_BSWAP) {
391 assert(MatchInfo.SrcOps.size() == 1);
392 LLT DstTy = MRI.getType(MatchInfo.Dst);
393 assert(DstTy == LLT::fixed_vector(8, 8) ||
394 DstTy == LLT::fixed_vector(16, 8));
395 LLT BSTy = DstTy == LLT::fixed_vector(8, 8)
398 // FIXME: NVCAST
399 auto BS1 = MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {BSTy},
400 MatchInfo.SrcOps[0]);
401 auto BS2 = MIRBuilder.buildInstr(MatchInfo.Opc, {BSTy}, {BS1});
402 MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {MatchInfo.Dst}, {BS2});
403 } else
404 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
405 MI.eraseFromParent();
406}
407
408/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
409/// Special-cased because the constant operand must be emitted as a G_CONSTANT
410/// for the imported tablegen patterns to work.
411void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
412 MachineIRBuilder MIRBuilder(MI);
413 if (MatchInfo.SrcOps[2].getImm() == 0)
414 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
415 else {
416 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
417 auto Cst = MIRBuilder.buildConstant(LLT::integer(32),
418 MatchInfo.SrcOps[2].getImm());
419 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
420 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
421 }
422 MI.eraseFromParent();
423}
424
425void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
426 Register Dst = MI.getOperand(0).getReg();
427 Register Src = MI.getOperand(1).getReg();
428 LLT DstTy = MRI.getType(Dst);
429 assert(DstTy.getSizeInBits() == 128 &&
430 "Expected 128bit vector in applyFullRev");
431 MachineIRBuilder MIRBuilder(MI);
432 auto Cst = MIRBuilder.buildConstant(LLT::integer(32), 8);
433 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
434 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
435 MI.eraseFromParent();
436}
437
438bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
439 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
440
441 auto ValAndVReg =
442 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
443 return !ValAndVReg;
444}
445
446void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
447 MachineIRBuilder &Builder) {
448 auto &Insert = cast<GInsertVectorElement>(MI);
449 Builder.setInstrAndDebugLoc(Insert);
450
451 Register Offset = Insert.getIndexReg();
452 LLT VecTy = MRI.getType(Insert.getReg(0));
453 LLT EltTy = MRI.getType(Insert.getElementReg());
454 LLT IdxTy = MRI.getType(Insert.getIndexReg());
455
456 if (VecTy.isScalableVector())
457 return;
458
459 // Create a stack slot and store the vector into it
460 MachineFunction &MF = Builder.getMF();
461 Align Alignment(
462 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
463 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
464 Alignment, false);
465 LLT FramePtrTy = LLT::pointer(0, 64);
467 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
468
469 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
470
471 // Get the pointer to the element, and be sure not to hit undefined behavior
472 // if the index is out of bounds.
474 "Expected a power-2 vector size");
475 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
476 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
477 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
478 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
479 Register EltPtr =
480 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
481 .getReg(0);
482
483 // Write the inserted element
484 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
485 // Reload the whole vector.
486 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
487 Insert.eraseFromParent();
488}
489
490/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
491/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
492///
493/// e.g.
494/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
495///
496/// Can be represented as
497///
498/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
499/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
500///
501bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
502 std::tuple<Register, int, Register, int> &MatchInfo) {
503 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
504 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
505 Register Dst = MI.getOperand(0).getReg();
506 int NumElts = MRI.getType(Dst).getNumElements();
507 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
508 if (!DstIsLeftAndDstLane)
509 return false;
510 bool DstIsLeft;
511 int DstLane;
512 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
513 Register Left = MI.getOperand(1).getReg();
514 Register Right = MI.getOperand(2).getReg();
515 Register DstVec = DstIsLeft ? Left : Right;
516 Register SrcVec = Left;
517
518 int SrcLane = ShuffleMask[DstLane];
519 if (SrcLane >= NumElts) {
520 SrcVec = Right;
521 SrcLane -= NumElts;
522 }
523
524 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
525 return true;
526}
527
528void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
529 MachineIRBuilder &Builder,
530 std::tuple<Register, int, Register, int> &MatchInfo) {
531 Builder.setInstrAndDebugLoc(MI);
532 Register Dst = MI.getOperand(0).getReg();
533 auto ScalarTy = MRI.getType(Dst).getElementType();
534 Register DstVec, SrcVec;
535 int DstLane, SrcLane;
536 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
537 auto SrcCst = Builder.buildConstant(LLT::integer(64), SrcLane);
538 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
539 auto DstCst = Builder.buildConstant(LLT::integer(64), DstLane);
540 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
541 MI.eraseFromParent();
542}
543
544/// isVShiftRImm - Check if this is a valid vector for the immediate
545/// operand of a vector shift right operation. The value must be in the range:
546/// 1 <= Value <= ElementBits for a right shift.
548 int64_t &Cnt) {
549 assert(Ty.isVector() && "vector shift count is not a vector type");
551 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
552 if (!Cst)
553 return false;
554 Cnt = *Cst;
555 int64_t ElementBits = Ty.getScalarSizeInBits();
556 return Cnt >= 1 && Cnt <= ElementBits;
557}
558
559/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
560bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
561 int64_t &Imm) {
562 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
563 MI.getOpcode() == TargetOpcode::G_LSHR);
564 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
565 if (!Ty.isVector())
566 return false;
567 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
568}
569
570void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
571 int64_t &Imm) {
572 unsigned Opc = MI.getOpcode();
573 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
574 unsigned NewOpc =
575 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
576 MachineIRBuilder MIB(MI);
577 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
578 MI.eraseFromParent();
579}
580
581/// Determine if it is possible to modify the \p RHS and predicate \p P of a
582/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
583///
584/// \returns A pair containing the updated immediate and predicate which may
585/// be used to optimize the instruction.
586///
587/// \note This assumes that the comparison has been legalized.
588std::optional<std::pair<uint64_t, CmpInst::Predicate>>
589tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
590 const MachineRegisterInfo &MRI) {
591 const auto &Ty = MRI.getType(RHS);
592 if (Ty.isVector())
593 return std::nullopt;
594 unsigned Size = Ty.getSizeInBits();
595 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
596
597 // If the RHS is not a constant, or the RHS is already a valid arithmetic
598 // immediate, then there is nothing to change.
599 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
600 if (!ValAndVReg)
601 return std::nullopt;
602 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
603 uint64_t C = OriginalC;
604 if (isLegalArithImmed(C))
605 return std::nullopt;
606
607 // We have a non-arithmetic immediate. Check if adjusting the immediate and
608 // adjusting the predicate will result in a legal arithmetic immediate.
609 switch (P) {
610 default:
611 return std::nullopt;
614 // Check for
615 //
616 // x slt c => x sle c - 1
617 // x sge c => x sgt c - 1
618 //
619 // When c is not the smallest possible negative number.
620 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
621 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
622 return std::nullopt;
624 C -= 1;
625 break;
628 // Check for
629 //
630 // x ult c => x ule c - 1
631 // x uge c => x ugt c - 1
632 //
633 // When c is not zero.
634 assert(C != 0 && "C should not be zero here!");
636 C -= 1;
637 break;
640 // Check for
641 //
642 // x sle c => x slt c + 1
643 // x sgt c => s sge c + 1
644 //
645 // When c is not the largest possible signed integer.
646 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
647 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
648 return std::nullopt;
650 C += 1;
651 break;
654 // Check for
655 //
656 // x ule c => x ult c + 1
657 // x ugt c => s uge c + 1
658 //
659 // When c is not the largest possible unsigned integer.
660 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
661 (Size == 64 && C == UINT64_MAX))
662 return std::nullopt;
664 C += 1;
665 break;
666 }
667
668 // Check if the new constant is valid, and return the updated constant and
669 // predicate if it is.
670 if (Size == 32)
671 C = static_cast<uint32_t>(C);
672 if (isLegalArithImmed(C))
673 return {{C, P}};
674
675 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
677 AArch64_IMM::expandMOVImm(Imm, 32, Insn);
678 return Insn.size();
679 };
680
681 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))
682 return {{C, P}};
683
684 return std::nullopt;
685}
686
687/// Determine whether or not it is possible to update the RHS and predicate of
688/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
689/// immediate.
690///
691/// \p MI - The G_ICMP instruction
692/// \p MatchInfo - The new RHS immediate and predicate on success
693///
694/// See tryAdjustICmpImmAndPred for valid transformations.
695bool matchAdjustICmpImmAndPred(
697 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
698 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
699 Register RHS = MI.getOperand(3).getReg();
700 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
701 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
702 MatchInfo = *MaybeNewImmAndPred;
703 return true;
704 }
705 return false;
706}
707
708void applyAdjustICmpImmAndPred(
709 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
710 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
712 MachineOperand &RHS = MI.getOperand(3);
713 MachineRegisterInfo &MRI = *MIB.getMRI();
714 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
715 MatchInfo.first);
716 Observer.changingInstr(MI);
717 RHS.setReg(Cst->getOperand(0).getReg());
718 MI.getOperand(1).setPredicate(MatchInfo.second);
719 Observer.changedInstr(MI);
720}
721
722bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
723 std::pair<unsigned, int> &MatchInfo) {
724 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
725 Register Src1Reg = MI.getOperand(1).getReg();
726 const LLT SrcTy = MRI.getType(Src1Reg);
727 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
728
729 auto LaneIdx = getSplatIndex(MI);
730 if (!LaneIdx)
731 return false;
732
733 // The lane idx should be within the first source vector.
734 if (*LaneIdx >= SrcTy.getNumElements())
735 return false;
736
737 if (DstTy != SrcTy)
738 return false;
739
740 LLT ScalarTy = SrcTy.getElementType();
741 unsigned ScalarSize = ScalarTy.getSizeInBits();
742
743 unsigned Opc = 0;
744 switch (SrcTy.getNumElements()) {
745 case 2:
746 if (ScalarSize == 64)
747 Opc = AArch64::G_DUPLANE64;
748 else if (ScalarSize == 32)
749 Opc = AArch64::G_DUPLANE32;
750 break;
751 case 4:
752 if (ScalarSize == 32)
753 Opc = AArch64::G_DUPLANE32;
754 else if (ScalarSize == 16)
755 Opc = AArch64::G_DUPLANE16;
756 break;
757 case 8:
758 if (ScalarSize == 8)
759 Opc = AArch64::G_DUPLANE8;
760 else if (ScalarSize == 16)
761 Opc = AArch64::G_DUPLANE16;
762 break;
763 case 16:
764 if (ScalarSize == 8)
765 Opc = AArch64::G_DUPLANE8;
766 break;
767 default:
768 break;
769 }
770 if (!Opc)
771 return false;
772
773 MatchInfo.first = Opc;
774 MatchInfo.second = *LaneIdx;
775 return true;
776}
777
778void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
779 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
780 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
781 Register Src1Reg = MI.getOperand(1).getReg();
782 const LLT SrcTy = MRI.getType(Src1Reg);
783
784 B.setInstrAndDebugLoc(MI);
785 auto Lane = B.buildConstant(LLT::integer(64), MatchInfo.second);
786
787 Register DupSrc = MI.getOperand(1).getReg();
788 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
789 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
790 if (SrcTy.getSizeInBits() == 64) {
791 auto Undef = B.buildUndef(SrcTy);
792 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
793 {Src1Reg, Undef.getReg(0)})
794 .getReg(0);
795 }
796 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
797 MI.eraseFromParent();
798}
799
800bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
801 auto &Unmerge = cast<GUnmerge>(MI);
802 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
803 const LLT SrcTy = MRI.getType(Src1Reg);
804 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
805 return false;
806 return SrcTy.isVector() && !SrcTy.isScalable() &&
807 (Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1 ||
808 (Unmerge.getNumDefs() == 2 && SrcTy.getSizeInBits() == 128 &&
809 MRI.getType(Unmerge.getReg(0)).getSizeInBits() == 64));
810}
811
812void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
814 auto &Unmerge = cast<GUnmerge>(MI);
815 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
816 const LLT SrcTy = MRI.getType(Src1Reg);
817 const LLT DstTy = MRI.getType(Unmerge.getReg(0));
818 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
819 "Expected a fixed length vector");
820
821 if (DstTy.isVector()) {
822 assert(Unmerge.getNumDefs() == 2);
823 if (!MRI.use_nodbg_empty(Unmerge.getReg(0)))
824 B.buildExtractSubvector(Unmerge.getReg(0), Src1Reg, 0);
825 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
826 B.buildExtractSubvector(Unmerge.getReg(1), Src1Reg,
827 SrcTy.getNumElements() / 2);
828 } else {
829 for (int I = 0; I < SrcTy.getNumElements(); ++I)
830 if (!MRI.use_nodbg_empty(Unmerge.getReg(I)))
831 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
832 }
833 MI.eraseFromParent();
834}
835
836bool matchBuildVectorToDup(MachineInstr &MI, Register &Src,
837 MachineRegisterInfo &MRI) {
838 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
839
840 // Later, during selection, we'll try to match imported patterns using
841 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
842 // G_BUILD_VECTORs which could match those patterns.
844 return false;
845
846 // Find buildvector which always uses the same register or undef. Return true
847 // so long as at least 2 registers were found (not all-undef or only 1
848 // non-undef entry).
849 Register Reg = 0;
850 unsigned NumNonUndef = 0;
851 for (const MachineOperand &Op : drop_begin(MI.operands())) {
852 if (getOpcodeDef<GImplicitDef>(Op.getReg(), MRI))
853 continue;
854
855 if (!Reg)
856 Reg = Op.getReg();
857 else if (Op.getReg() != Reg)
858 return false;
859 NumNonUndef++;
860 }
861
862 Src = Reg;
863 return Reg && NumNonUndef > 1;
864}
865
866void applyBuildVectorToDup(MachineInstr &MI, Register Src,
868 B.setInstrAndDebugLoc(MI);
869 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()}, {Src});
870 MI.eraseFromParent();
871}
872
873/// \returns how many instructions would be saved by folding a G_ICMP's shift
874/// and/or extension operations.
876 // No instructions to save if there's more than one use or no uses.
877 if (!MRI.hasOneNonDBGUse(CmpOp))
878 return 0;
879
880 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
881 auto IsSupportedExtend = [&](const MachineInstr &MI) {
882 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
883 return true;
884 if (MI.getOpcode() != TargetOpcode::G_AND)
885 return false;
886 auto ValAndVReg =
887 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
888 if (!ValAndVReg)
889 return false;
890 uint64_t Mask = ValAndVReg->Value.getZExtValue();
891 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
892 };
893
894 MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);
895 if (IsSupportedExtend(*Def))
896 return 1;
897
898 unsigned Opc = Def->getOpcode();
899 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
900 Opc != TargetOpcode::G_LSHR)
901 return 0;
902
903 auto MaybeShiftAmt =
904 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
905 if (!MaybeShiftAmt)
906 return 0;
907 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
908 MachineInstr *ShiftLHS =
909 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
910
911 // Check if we can fold an extend and a shift.
912 // FIXME: This is duplicated with the selector. (See:
913 // selectArithExtendedRegister)
914 if (IsSupportedExtend(*ShiftLHS))
915 return (ShiftAmt <= 4) ? 2 : 1;
916
917 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
918 if (Ty.isVector())
919 return 0;
920 unsigned ShiftSize = Ty.getSizeInBits();
921 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
922 (ShiftSize == 64 && ShiftAmt <= 63))
923 return 1;
924 return 0;
925}
926
927/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
928/// instruction \p MI.
929bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
930 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
931 // Swap the operands if it would introduce a profitable folding opportunity.
932 // (e.g. a shift + extend).
933 //
934 // For example:
935 // lsl w13, w11, #1
936 // cmp w13, w12
937 // can be turned into:
938 // cmp w12, w11, lsl #1
939
940 // Don't swap if there's a constant on the RHS, because we know we can fold
941 // that.
942 Register RHS = MI.getOperand(3).getReg();
943 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
944 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
945 return false;
946
947 Register LHS = MI.getOperand(2).getReg();
948 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
949 auto GetRegForProfit = [&](Register Reg) {
951 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
952 };
953
954 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
955 // compare, would we be able to fold more instructions?
956 Register TheLHS = GetRegForProfit(LHS);
957 Register TheRHS = GetRegForProfit(RHS);
958
959 // If the LHS is more likely to give us a folding opportunity, then swap the
960 // LHS and RHS.
961 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
962 getCmpOperandFoldingProfit(TheRHS, MRI));
963}
964
965void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
966 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
967 Register LHS = MI.getOperand(2).getReg();
968 Register RHS = MI.getOperand(3).getReg();
969 Observer.changedInstr(MI);
970 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
971 MI.getOperand(2).setReg(RHS);
972 MI.getOperand(3).setReg(LHS);
973 Observer.changedInstr(MI);
974}
975
976/// \returns a function which builds a vector floating point compare instruction
977/// for a condition code \p CC.
978/// \param [in] NoNans - True if the instruction has nnan flag.
979std::function<Register(MachineIRBuilder &)>
980getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
981 MachineRegisterInfo &MRI) {
982 LLT OldTy = MRI.getType(LHS);
983 LLT DstTy = LLT::fixed_vector(OldTy.getNumElements(),
985 assert(DstTy.isVector() && "Expected vector types only?");
986 switch (CC) {
987 default:
988 llvm_unreachable("Unexpected condition code!");
989 case AArch64CC::NE:
990 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
991 auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
992 return MIB.buildNot(DstTy, FCmp).getReg(0);
993 };
994 case AArch64CC::EQ:
995 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
996 return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
997 };
998 case AArch64CC::GE:
999 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1000 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
1001 };
1002 case AArch64CC::GT:
1003 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1004 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
1005 };
1006 case AArch64CC::LS:
1007 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1008 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
1009 };
1010 case AArch64CC::MI:
1011 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1012 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
1013 };
1014 }
1015}
1016
1017/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1018bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1019 MachineIRBuilder &MIB) {
1020 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1021 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1022
1023 Register Dst = MI.getOperand(0).getReg();
1024 LLT DstTy = MRI.getType(Dst);
1025 if (!DstTy.isVector() || !ST.hasNEON())
1026 return false;
1027 Register LHS = MI.getOperand(2).getReg();
1028 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
1029 if (EltSize == 16 && !ST.hasFullFP16())
1030 return false;
1031 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1032 return false;
1033
1034 return true;
1035}
1036
1037/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1038void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1039 MachineIRBuilder &MIB) {
1040 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1041
1042 const auto &CmpMI = cast<GFCmp>(MI);
1043
1044 Register Dst = CmpMI.getReg(0);
1045 CmpInst::Predicate Pred = CmpMI.getCond();
1046 Register LHS = CmpMI.getLHSReg();
1047 Register RHS = CmpMI.getRHSReg();
1048
1049 LLT DstTy = MRI.getType(Dst);
1050
1051 bool Invert = false;
1053 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1055 isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
1056 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1057 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1058 // "ord" normally would.
1059 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1060 // thus equivalent to a != a.
1061 RHS = LHS;
1063 } else
1064 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1065
1066 // Instead of having an apply function, just build here to simplify things.
1068
1069 // TODO: Also consider GISelValueTracking result if eligible.
1070 const bool NoNans = MI.getFlag(MachineInstr::FmNoNans);
1071
1072 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1073 Register CmpRes;
1074 if (CC2 == AArch64CC::AL)
1075 CmpRes = Cmp(MIB);
1076 else {
1077 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
1078 auto Cmp2Dst = Cmp2(MIB);
1079 auto Cmp1Dst = Cmp(MIB);
1080 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1081 }
1082 if (Invert)
1083 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1084 MRI.replaceRegWith(Dst, CmpRes);
1085 MI.eraseFromParent();
1086}
1087
1088// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1089bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1090 auto *GBuildVec = cast<GBuildVector>(&MI);
1091
1092 // Check if the values are all constants
1093 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1094 auto ConstVal =
1095 getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);
1096
1097 if (!ConstVal.has_value())
1098 return true;
1099 }
1100
1101 return false;
1102}
1103
1104void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1106 auto *GBuildVec = cast<GBuildVector>(&MI);
1107 LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1108 Register DstReg = B.buildUndef(DstTy).getReg(0);
1109
1110 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1111 Register SrcReg = GBuildVec->getSourceReg(I);
1112 if (mi_match(SrcReg, MRI, m_GImplicitDef()))
1113 continue;
1114 auto IdxReg = B.buildConstant(LLT::integer(64), I);
1115 DstReg =
1116 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1117 }
1118 B.buildCopy(GBuildVec->getReg(0), DstReg);
1119 GBuildVec->eraseFromParent();
1120}
1121
1122bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1123 Register &SrcReg) {
1124 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1125 Register DstReg = MI.getOperand(0).getReg();
1126 if (MRI.getType(DstReg).isVector())
1127 return false;
1128 // Match a store of a truncate.
1129 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1130 return false;
1131 // Only form truncstores for value types of max 64b.
1132 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1133}
1134
1135void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1137 Register &SrcReg) {
1138 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1139 Observer.changingInstr(MI);
1140 MI.getOperand(0).setReg(SrcReg);
1141 Observer.changedInstr(MI);
1142}
1143
1144// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1145// form in the first place for combine opportunities, so any remaining ones
1146// at this stage need be lowered back.
1147bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1148 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1149 Register DstReg = MI.getOperand(0).getReg();
1150 LLT DstTy = MRI.getType(DstReg);
1151 return DstTy.isVector();
1152}
1153
1154void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1156 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1157 B.setInstrAndDebugLoc(MI);
1158 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1159 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1160}
1161
1162/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1163/// => unused, <N x t> = unmerge v
1164bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1165 Register &MatchInfo) {
1166 auto &Unmerge = cast<GUnmerge>(MI);
1167 if (Unmerge.getNumDefs() != 2)
1168 return false;
1169 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1170 return false;
1171
1172 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1173 if (!DstTy.isVector())
1174 return false;
1175
1176 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1177 if (!Ext)
1178 return false;
1179
1180 Register ExtSrc1 = Ext->getOperand(1).getReg();
1181 Register ExtSrc2 = Ext->getOperand(2).getReg();
1182 auto LowestVal =
1184 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1185 return false;
1186
1187 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1188 return false;
1189
1190 MatchInfo = ExtSrc1;
1191 return true;
1192}
1193
1194void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1196 GISelChangeObserver &Observer, Register &SrcReg) {
1197 Observer.changingInstr(MI);
1198 // Swap dst registers.
1199 Register Dst1 = MI.getOperand(0).getReg();
1200 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1201 MI.getOperand(1).setReg(Dst1);
1202 MI.getOperand(2).setReg(SrcReg);
1203 Observer.changedInstr(MI);
1204}
1205
1206// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1207// Match v2s64 mul instructions, which will then be scalarised later on
1208// Doing these two matches in one function to ensure that the order of matching
1209// will always be the same.
1210// Try lowering MUL to MULL before trying to scalarize if needed.
1211bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {
1212 // Get the instructions that defined the source operand
1213 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1214 return DstTy == LLT::fixed_vector(2, 64);
1215}
1216
1217void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,
1219 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1220 "Expected a G_MUL instruction");
1221
1222 // Get the instructions that defined the source operand
1223 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1224 assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");
1225 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1226 Helper.fewerElementsVector(
1227 MI, 0,
1229}
1230
1231class AArch64PostLegalizerLoweringImpl : public Combiner {
1232protected:
1233 const CombinerHelper Helper;
1234 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1235 const AArch64Subtarget &STI;
1236
1237public:
1238 AArch64PostLegalizerLoweringImpl(
1239 MachineFunction &MF, CombinerInfo &CInfo, GISelCSEInfo *CSEInfo,
1240 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1241 const AArch64Subtarget &STI);
1242
1243 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1244
1245 bool tryCombineAll(MachineInstr &I) const override;
1246
1247private:
1248#define GET_GICOMBINER_CLASS_MEMBERS
1249#include "AArch64GenPostLegalizeGILowering.inc"
1250#undef GET_GICOMBINER_CLASS_MEMBERS
1251};
1252
1253#define GET_GICOMBINER_IMPL
1254#include "AArch64GenPostLegalizeGILowering.inc"
1255#undef GET_GICOMBINER_IMPL
1256
1257AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1258 MachineFunction &MF, CombinerInfo &CInfo, GISelCSEInfo *CSEInfo,
1259 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1260 const AArch64Subtarget &STI)
1261 : Combiner(MF, CInfo, /*VT*/ nullptr, CSEInfo),
1262 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1263 STI(STI),
1265#include "AArch64GenPostLegalizeGILowering.inc"
1267{
1268}
1269
1270bool runPostLegalizerLowering(
1271 MachineFunction &MF,
1272 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig) {
1273 if (MF.getProperties().hasFailedISel())
1274 return false;
1275 const Function &F = MF.getFunction();
1276
1278 CombinerInfo CInfo(/*AllowIllegalOps=*/true, /*ShouldLegalizeIllegal=*/false,
1279 /*LegalizerInfo=*/nullptr, /*OptEnabled=*/true,
1280 F.hasOptSize(), F.hasMinSize());
1281 // Disable fixed-point iteration to reduce compile-time
1282 CInfo.MaxIterations = 1;
1283 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1284 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1285 CInfo.EnableFullDCE = false;
1286 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, /*CSEInfo=*/nullptr,
1287 RuleConfig, ST);
1288 return Impl.combineMachineInstrs();
1289}
1290
1291class AArch64PostLegalizerLoweringLegacy : public MachineFunctionPass {
1292public:
1293 static char ID;
1294
1295 AArch64PostLegalizerLoweringLegacy();
1296
1297 StringRef getPassName() const override {
1298 return "AArch64PostLegalizerLowering";
1299 }
1300
1301 bool runOnMachineFunction(MachineFunction &MF) override;
1302 void getAnalysisUsage(AnalysisUsage &AU) const override;
1303
1304private:
1305 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1306};
1307} // end anonymous namespace
1308
1309void AArch64PostLegalizerLoweringLegacy::getAnalysisUsage(
1310 AnalysisUsage &AU) const {
1311 AU.setPreservesCFG();
1314}
1315
1316AArch64PostLegalizerLoweringLegacy::AArch64PostLegalizerLoweringLegacy()
1317 : MachineFunctionPass(ID) {
1318 if (!RuleConfig.parseCommandLineOption())
1319 report_fatal_error("Invalid rule identifier");
1320}
1321
1322bool AArch64PostLegalizerLoweringLegacy::runOnMachineFunction(
1323 MachineFunction &MF) {
1324 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1325 return runPostLegalizerLowering(MF, RuleConfig);
1326}
1327
1328char AArch64PostLegalizerLoweringLegacy::ID = 0;
1329INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLoweringLegacy, DEBUG_TYPE,
1330 "Lower AArch64 MachineInstrs after legalization", false,
1331 false)
1332INITIALIZE_PASS_END(AArch64PostLegalizerLoweringLegacy, DEBUG_TYPE,
1333 "Lower AArch64 MachineInstrs after legalization", false,
1334 false)
1335
1337 : RuleConfig(
1338 std::make_unique<AArch64PostLegalizerLoweringImplRuleConfig>()) {
1339 if (!RuleConfig->parseCommandLineOption())
1340 reportFatalUsageError("invalid rule identifier");
1341}
1342
1345
1347
1351 MFPropsModifier _(*this, MF);
1352 const bool Changed = runPostLegalizerLowering(MF, *RuleConfig);
1353
1354 if (!Changed)
1355 return PreservedAnalyses::all();
1356
1359 return PA;
1360}
1361
1362namespace llvm {
1364 return new AArch64PostLegalizerLoweringLegacy();
1365}
1366} // end namespace llvm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Value * RHS
Value * LHS
BinaryOperator * Mul
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned logBase2() const
Definition APInt.h:1784
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:39
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr bool isLegalArithImmed(const uint64_t C)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ImplicitDefMatch m_GImplicitDef()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1440
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
@ Undef
Value of the register doesn't matter.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1446
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1143
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.