LLVM 20.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64ExpandImm.h"
25#include "AArch64Subtarget.h"
45#include "llvm/IR/InstrTypes.h"
48#include <optional>
49
50#define GET_GICOMBINER_DEPS
51#include "AArch64GenPostLegalizeGILowering.inc"
52#undef GET_GICOMBINER_DEPS
53
54#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
55
56using namespace llvm;
57using namespace MIPatternMatch;
58using namespace AArch64GISelUtils;
59
60namespace {
61
62#define GET_GICOMBINER_TYPES
63#include "AArch64GenPostLegalizeGILowering.inc"
64#undef GET_GICOMBINER_TYPES
65
66/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
67///
68/// Used for matching target-supported shuffles before codegen.
69struct ShuffleVectorPseudo {
70 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
71 Register Dst; ///< Destination register.
72 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
73 ShuffleVectorPseudo(unsigned Opc, Register Dst,
74 std::initializer_list<SrcOp> SrcOps)
75 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
76 ShuffleVectorPseudo() = default;
77};
78
79/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
80/// sources of the shuffle are different.
81std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
82 unsigned NumElts) {
83 // Look for the first non-undef element.
84 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
85 if (FirstRealElt == M.end())
86 return std::nullopt;
87
88 // Use APInt to handle overflow when calculating expected element.
89 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
90 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
91
92 // The following shuffle indices must be the successive elements after the
93 // first real element.
94 if (any_of(
95 make_range(std::next(FirstRealElt), M.end()),
96 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
97 return std::nullopt;
98
99 // The index of an EXT is the first element if it is not UNDEF.
100 // Watch out for the beginning UNDEFs. The EXT index should be the expected
101 // value of the first element. E.g.
102 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
103 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
104 // ExpectedElt is the last mask index plus 1.
105 uint64_t Imm = ExpectedElt.getZExtValue();
106 bool ReverseExt = false;
107
108 // There are two difference cases requiring to reverse input vectors.
109 // For example, for vector <4 x i32> we have the following cases,
110 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
111 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
112 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
113 // to reverse two input vectors.
114 if (Imm < NumElts)
115 ReverseExt = true;
116 else
117 Imm -= NumElts;
118 return std::make_pair(ReverseExt, Imm);
119}
120
121/// Helper function for matchINS.
122///
123/// \returns a value when \p M is an ins mask for \p NumInputElements.
124///
125/// First element of the returned pair is true when the produced
126/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
127///
128/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
129std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
130 int NumInputElements) {
131 if (M.size() != static_cast<size_t>(NumInputElements))
132 return std::nullopt;
133 int NumLHSMatch = 0, NumRHSMatch = 0;
134 int LastLHSMismatch = -1, LastRHSMismatch = -1;
135 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
136 if (M[Idx] == -1) {
137 ++NumLHSMatch;
138 ++NumRHSMatch;
139 continue;
140 }
141 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
142 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
143 }
144 const int NumNeededToMatch = NumInputElements - 1;
145 if (NumLHSMatch == NumNeededToMatch)
146 return std::make_pair(true, LastLHSMismatch);
147 if (NumRHSMatch == NumNeededToMatch)
148 return std::make_pair(false, LastRHSMismatch);
149 return std::nullopt;
150}
151
152/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
153/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
154bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
155 ShuffleVectorPseudo &MatchInfo) {
156 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
157 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
158 Register Dst = MI.getOperand(0).getReg();
159 Register Src = MI.getOperand(1).getReg();
160 LLT Ty = MRI.getType(Dst);
161 unsigned EltSize = Ty.getScalarSizeInBits();
162
163 // Element size for a rev cannot be 64.
164 if (EltSize == 64)
165 return false;
166
167 unsigned NumElts = Ty.getNumElements();
168
169 // Try to produce a G_REV instruction
170 for (unsigned LaneSize : {64U, 32U, 16U}) {
171 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
172 unsigned Opcode;
173 if (LaneSize == 64U)
174 Opcode = AArch64::G_REV64;
175 else if (LaneSize == 32U)
176 Opcode = AArch64::G_REV32;
177 else
178 Opcode = AArch64::G_REV16;
179
180 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
181 return true;
182 }
183 }
184
185 return false;
186}
187
188/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
189/// a G_TRN1 or G_TRN2 instruction.
190bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
191 ShuffleVectorPseudo &MatchInfo) {
192 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
193 unsigned WhichResult;
194 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
195 Register Dst = MI.getOperand(0).getReg();
196 unsigned NumElts = MRI.getType(Dst).getNumElements();
197 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
198 return false;
199 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
200 Register V1 = MI.getOperand(1).getReg();
201 Register V2 = MI.getOperand(2).getReg();
202 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
203 return true;
204}
205
206/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
207/// a G_UZP1 or G_UZP2 instruction.
208///
209/// \param [in] MI - The shuffle vector instruction.
210/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
211bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
212 ShuffleVectorPseudo &MatchInfo) {
213 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
214 unsigned WhichResult;
215 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
216 Register Dst = MI.getOperand(0).getReg();
217 unsigned NumElts = MRI.getType(Dst).getNumElements();
218 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
219 return false;
220 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
221 Register V1 = MI.getOperand(1).getReg();
222 Register V2 = MI.getOperand(2).getReg();
223 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
224 return true;
225}
226
227bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
228 ShuffleVectorPseudo &MatchInfo) {
229 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
230 unsigned WhichResult;
231 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
232 Register Dst = MI.getOperand(0).getReg();
233 unsigned NumElts = MRI.getType(Dst).getNumElements();
234 if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
235 return false;
236 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
237 Register V1 = MI.getOperand(1).getReg();
238 Register V2 = MI.getOperand(2).getReg();
239 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
240 return true;
241}
242
243/// Helper function for matchDup.
244bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
246 ShuffleVectorPseudo &MatchInfo) {
247 if (Lane != 0)
248 return false;
249
250 // Try to match a vector splat operation into a dup instruction.
251 // We're looking for this pattern:
252 //
253 // %scalar:gpr(s64) = COPY $x0
254 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
255 // %cst0:gpr(s32) = G_CONSTANT i32 0
256 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
257 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
258 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
259 // %zerovec(<2 x s32>)
260 //
261 // ...into:
262 // %splat = G_DUP %scalar
263
264 // Begin matching the insert.
265 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
266 MI.getOperand(1).getReg(), MRI);
267 if (!InsMI)
268 return false;
269 // Match the undef vector operand.
270 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
271 MRI))
272 return false;
273
274 // Match the index constant 0.
275 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
276 return false;
277
278 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
279 {InsMI->getOperand(2).getReg()});
280 return true;
281}
282
283/// Helper function for matchDup.
284bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
286 ShuffleVectorPseudo &MatchInfo) {
287 assert(Lane >= 0 && "Expected positive lane?");
288 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
289 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
290 // lane's definition directly.
291 auto *BuildVecMI =
292 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
293 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
294 // If Lane >= NumElements then it is point to RHS, just check from RHS
295 if (NumElements <= Lane)
296 Lane -= NumElements;
297
298 if (!BuildVecMI)
299 return false;
300 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
301 MatchInfo =
302 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
303 return true;
304}
305
306bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
307 ShuffleVectorPseudo &MatchInfo) {
308 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
309 auto MaybeLane = getSplatIndex(MI);
310 if (!MaybeLane)
311 return false;
312 int Lane = *MaybeLane;
313 // If this is undef splat, generate it via "just" vdup, if possible.
314 if (Lane < 0)
315 Lane = 0;
316 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
317 return true;
318 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
319 return true;
320 return false;
321}
322
323// Check if an EXT instruction can handle the shuffle mask when the vector
324// sources of the shuffle are the same.
325bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
326 unsigned NumElts = Ty.getNumElements();
327
328 // Assume that the first shuffle index is not UNDEF. Fail if it is.
329 if (M[0] < 0)
330 return false;
331
332 // If this is a VEXT shuffle, the immediate value is the index of the first
333 // element. The other shuffle indices must be the successive elements after
334 // the first one.
335 unsigned ExpectedElt = M[0];
336 for (unsigned I = 1; I < NumElts; ++I) {
337 // Increment the expected index. If it wraps around, just follow it
338 // back to index zero and keep going.
339 ++ExpectedElt;
340 if (ExpectedElt == NumElts)
341 ExpectedElt = 0;
342
343 if (M[I] < 0)
344 continue; // Ignore UNDEF indices.
345 if (ExpectedElt != static_cast<unsigned>(M[I]))
346 return false;
347 }
348
349 return true;
350}
351
352bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
353 ShuffleVectorPseudo &MatchInfo) {
354 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
355 Register Dst = MI.getOperand(0).getReg();
356 LLT DstTy = MRI.getType(Dst);
357 Register V1 = MI.getOperand(1).getReg();
358 Register V2 = MI.getOperand(2).getReg();
359 auto Mask = MI.getOperand(3).getShuffleMask();
361 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
362 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
363
364 if (!ExtInfo) {
365 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
366 !isSingletonExtMask(Mask, DstTy))
367 return false;
368
369 Imm = Mask[0] * ExtFactor;
370 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
371 return true;
372 }
373 bool ReverseExt;
374 std::tie(ReverseExt, Imm) = *ExtInfo;
375 if (ReverseExt)
376 std::swap(V1, V2);
377 Imm *= ExtFactor;
378 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
379 return true;
380}
381
382/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
383/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
384void applyShuffleVectorPseudo(MachineInstr &MI,
385 ShuffleVectorPseudo &MatchInfo) {
386 MachineIRBuilder MIRBuilder(MI);
387 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
388 MI.eraseFromParent();
389}
390
391/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
392/// Special-cased because the constant operand must be emitted as a G_CONSTANT
393/// for the imported tablegen patterns to work.
394void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
395 MachineIRBuilder MIRBuilder(MI);
396 if (MatchInfo.SrcOps[2].getImm() == 0)
397 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
398 else {
399 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
400 auto Cst =
401 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
402 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
403 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
404 }
405 MI.eraseFromParent();
406}
407
408void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
409 Register Dst = MI.getOperand(0).getReg();
410 Register Src = MI.getOperand(1).getReg();
411 LLT DstTy = MRI.getType(Dst);
412 assert(DstTy.getSizeInBits() == 128 &&
413 "Expected 128bit vector in applyFullRev");
414 MachineIRBuilder MIRBuilder(MI);
415 auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
416 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
417 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
418 MI.eraseFromParent();
419}
420
421bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
422 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
423
424 auto ValAndVReg =
425 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
426 return !ValAndVReg;
427}
428
429void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
430 MachineIRBuilder &Builder) {
431 auto &Insert = cast<GInsertVectorElement>(MI);
432 Builder.setInstrAndDebugLoc(Insert);
433
434 Register Offset = Insert.getIndexReg();
435 LLT VecTy = MRI.getType(Insert.getReg(0));
436 LLT EltTy = MRI.getType(Insert.getElementReg());
437 LLT IdxTy = MRI.getType(Insert.getIndexReg());
438
439 if (VecTy.isScalableVector())
440 return;
441
442 // Create a stack slot and store the vector into it
443 MachineFunction &MF = Builder.getMF();
444 Align Alignment(
445 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
446 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
447 Alignment, false);
448 LLT FramePtrTy = LLT::pointer(0, 64);
450 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
451
452 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
453
454 // Get the pointer to the element, and be sure not to hit undefined behavior
455 // if the index is out of bounds.
457 "Expected a power-2 vector size");
458 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
459 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
460 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
461 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
462 Register EltPtr =
463 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
464 .getReg(0);
465
466 // Write the inserted element
467 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
468 // Reload the whole vector.
469 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
470 Insert.eraseFromParent();
471}
472
473/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
474/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
475///
476/// e.g.
477/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
478///
479/// Can be represented as
480///
481/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
482/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
483///
484bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
485 std::tuple<Register, int, Register, int> &MatchInfo) {
486 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
487 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
488 Register Dst = MI.getOperand(0).getReg();
489 int NumElts = MRI.getType(Dst).getNumElements();
490 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
491 if (!DstIsLeftAndDstLane)
492 return false;
493 bool DstIsLeft;
494 int DstLane;
495 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
496 Register Left = MI.getOperand(1).getReg();
497 Register Right = MI.getOperand(2).getReg();
498 Register DstVec = DstIsLeft ? Left : Right;
499 Register SrcVec = Left;
500
501 int SrcLane = ShuffleMask[DstLane];
502 if (SrcLane >= NumElts) {
503 SrcVec = Right;
504 SrcLane -= NumElts;
505 }
506
507 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
508 return true;
509}
510
511void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
512 MachineIRBuilder &Builder,
513 std::tuple<Register, int, Register, int> &MatchInfo) {
514 Builder.setInstrAndDebugLoc(MI);
515 Register Dst = MI.getOperand(0).getReg();
516 auto ScalarTy = MRI.getType(Dst).getElementType();
517 Register DstVec, SrcVec;
518 int DstLane, SrcLane;
519 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
520 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
521 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
522 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
523 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
524 MI.eraseFromParent();
525}
526
527/// isVShiftRImm - Check if this is a valid vector for the immediate
528/// operand of a vector shift right operation. The value must be in the range:
529/// 1 <= Value <= ElementBits for a right shift.
531 int64_t &Cnt) {
532 assert(Ty.isVector() && "vector shift count is not a vector type");
533 MachineInstr *MI = MRI.getVRegDef(Reg);
534 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
535 if (!Cst)
536 return false;
537 Cnt = *Cst;
538 int64_t ElementBits = Ty.getScalarSizeInBits();
539 return Cnt >= 1 && Cnt <= ElementBits;
540}
541
542/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
543bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
544 int64_t &Imm) {
545 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
546 MI.getOpcode() == TargetOpcode::G_LSHR);
547 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
548 if (!Ty.isVector())
549 return false;
550 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
551}
552
553void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
554 int64_t &Imm) {
555 unsigned Opc = MI.getOpcode();
556 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
557 unsigned NewOpc =
558 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
559 MachineIRBuilder MIB(MI);
560 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
561 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
562 MI.eraseFromParent();
563}
564
565/// Determine if it is possible to modify the \p RHS and predicate \p P of a
566/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
567///
568/// \returns A pair containing the updated immediate and predicate which may
569/// be used to optimize the instruction.
570///
571/// \note This assumes that the comparison has been legalized.
572std::optional<std::pair<uint64_t, CmpInst::Predicate>>
573tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
574 const MachineRegisterInfo &MRI) {
575 const auto &Ty = MRI.getType(RHS);
576 if (Ty.isVector())
577 return std::nullopt;
578 unsigned Size = Ty.getSizeInBits();
579 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
580
581 // If the RHS is not a constant, or the RHS is already a valid arithmetic
582 // immediate, then there is nothing to change.
583 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
584 if (!ValAndVReg)
585 return std::nullopt;
586 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
587 uint64_t C = OriginalC;
588 if (isLegalArithImmed(C))
589 return std::nullopt;
590
591 // We have a non-arithmetic immediate. Check if adjusting the immediate and
592 // adjusting the predicate will result in a legal arithmetic immediate.
593 switch (P) {
594 default:
595 return std::nullopt;
598 // Check for
599 //
600 // x slt c => x sle c - 1
601 // x sge c => x sgt c - 1
602 //
603 // When c is not the smallest possible negative number.
604 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
605 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
606 return std::nullopt;
608 C -= 1;
609 break;
612 // Check for
613 //
614 // x ult c => x ule c - 1
615 // x uge c => x ugt c - 1
616 //
617 // When c is not zero.
618 if (C == 0)
619 return std::nullopt;
621 C -= 1;
622 break;
625 // Check for
626 //
627 // x sle c => x slt c + 1
628 // x sgt c => s sge c + 1
629 //
630 // When c is not the largest possible signed integer.
631 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
632 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
633 return std::nullopt;
635 C += 1;
636 break;
639 // Check for
640 //
641 // x ule c => x ult c + 1
642 // x ugt c => s uge c + 1
643 //
644 // When c is not the largest possible unsigned integer.
645 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
646 (Size == 64 && C == UINT64_MAX))
647 return std::nullopt;
649 C += 1;
650 break;
651 }
652
653 // Check if the new constant is valid, and return the updated constant and
654 // predicate if it is.
655 if (Size == 32)
656 C = static_cast<uint32_t>(C);
657 if (isLegalArithImmed(C))
658 return {{C, P}};
659
660 auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) {
663 return Insn.size() == 1;
664 };
665
666 if (!IsMaterializableInSingleInstruction(OriginalC) &&
667 IsMaterializableInSingleInstruction(C))
668 return {{C, P}};
669
670 return std::nullopt;
671}
672
673/// Determine whether or not it is possible to update the RHS and predicate of
674/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
675/// immediate.
676///
677/// \p MI - The G_ICMP instruction
678/// \p MatchInfo - The new RHS immediate and predicate on success
679///
680/// See tryAdjustICmpImmAndPred for valid transformations.
681bool matchAdjustICmpImmAndPred(
683 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
684 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
685 Register RHS = MI.getOperand(3).getReg();
686 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
687 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
688 MatchInfo = *MaybeNewImmAndPred;
689 return true;
690 }
691 return false;
692}
693
694void applyAdjustICmpImmAndPred(
695 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
696 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
698 MachineOperand &RHS = MI.getOperand(3);
700 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
701 MatchInfo.first);
702 Observer.changingInstr(MI);
703 RHS.setReg(Cst->getOperand(0).getReg());
704 MI.getOperand(1).setPredicate(MatchInfo.second);
705 Observer.changedInstr(MI);
706}
707
708bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
709 std::pair<unsigned, int> &MatchInfo) {
710 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
711 Register Src1Reg = MI.getOperand(1).getReg();
712 const LLT SrcTy = MRI.getType(Src1Reg);
713 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
714
715 auto LaneIdx = getSplatIndex(MI);
716 if (!LaneIdx)
717 return false;
718
719 // The lane idx should be within the first source vector.
720 if (*LaneIdx >= SrcTy.getNumElements())
721 return false;
722
723 if (DstTy != SrcTy)
724 return false;
725
726 LLT ScalarTy = SrcTy.getElementType();
727 unsigned ScalarSize = ScalarTy.getSizeInBits();
728
729 unsigned Opc = 0;
730 switch (SrcTy.getNumElements()) {
731 case 2:
732 if (ScalarSize == 64)
733 Opc = AArch64::G_DUPLANE64;
734 else if (ScalarSize == 32)
735 Opc = AArch64::G_DUPLANE32;
736 break;
737 case 4:
738 if (ScalarSize == 32)
739 Opc = AArch64::G_DUPLANE32;
740 else if (ScalarSize == 16)
741 Opc = AArch64::G_DUPLANE16;
742 break;
743 case 8:
744 if (ScalarSize == 8)
745 Opc = AArch64::G_DUPLANE8;
746 else if (ScalarSize == 16)
747 Opc = AArch64::G_DUPLANE16;
748 break;
749 case 16:
750 if (ScalarSize == 8)
751 Opc = AArch64::G_DUPLANE8;
752 break;
753 default:
754 break;
755 }
756 if (!Opc)
757 return false;
758
759 MatchInfo.first = Opc;
760 MatchInfo.second = *LaneIdx;
761 return true;
762}
763
764void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
765 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
766 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
767 Register Src1Reg = MI.getOperand(1).getReg();
768 const LLT SrcTy = MRI.getType(Src1Reg);
769
770 B.setInstrAndDebugLoc(MI);
771 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
772
773 Register DupSrc = MI.getOperand(1).getReg();
774 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
775 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
776 if (SrcTy.getSizeInBits() == 64) {
777 auto Undef = B.buildUndef(SrcTy);
778 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
779 {Src1Reg, Undef.getReg(0)})
780 .getReg(0);
781 }
782 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
783 MI.eraseFromParent();
784}
785
786bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
787 auto &Unmerge = cast<GUnmerge>(MI);
788 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
789 const LLT SrcTy = MRI.getType(Src1Reg);
790 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
791 return false;
792 return SrcTy.isVector() && !SrcTy.isScalable() &&
793 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
794}
795
796void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
798 auto &Unmerge = cast<GUnmerge>(MI);
799 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
800 const LLT SrcTy = MRI.getType(Src1Reg);
801 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
802 "Expected a fixed length vector");
803
804 for (int I = 0; I < SrcTy.getNumElements(); ++I)
805 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
806 MI.eraseFromParent();
807}
808
809bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
810 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
812 if (!Splat)
813 return false;
814 if (Splat->isReg())
815 return true;
816 // Later, during selection, we'll try to match imported patterns using
817 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
818 // G_BUILD_VECTORs which could match those patterns.
819 int64_t Cst = Splat->getCst();
820 return (Cst != 0 && Cst != -1);
821}
822
823void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
825 B.setInstrAndDebugLoc(MI);
826 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
827 {MI.getOperand(1).getReg()});
828 MI.eraseFromParent();
829}
830
831/// \returns how many instructions would be saved by folding a G_ICMP's shift
832/// and/or extension operations.
834 // No instructions to save if there's more than one use or no uses.
835 if (!MRI.hasOneNonDBGUse(CmpOp))
836 return 0;
837
838 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
839 auto IsSupportedExtend = [&](const MachineInstr &MI) {
840 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
841 return true;
842 if (MI.getOpcode() != TargetOpcode::G_AND)
843 return false;
844 auto ValAndVReg =
845 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
846 if (!ValAndVReg)
847 return false;
848 uint64_t Mask = ValAndVReg->Value.getZExtValue();
849 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
850 };
851
853 if (IsSupportedExtend(*Def))
854 return 1;
855
856 unsigned Opc = Def->getOpcode();
857 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
858 Opc != TargetOpcode::G_LSHR)
859 return 0;
860
861 auto MaybeShiftAmt =
862 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
863 if (!MaybeShiftAmt)
864 return 0;
865 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
866 MachineInstr *ShiftLHS =
867 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
868
869 // Check if we can fold an extend and a shift.
870 // FIXME: This is duplicated with the selector. (See:
871 // selectArithExtendedRegister)
872 if (IsSupportedExtend(*ShiftLHS))
873 return (ShiftAmt <= 4) ? 2 : 1;
874
875 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
876 if (Ty.isVector())
877 return 0;
878 unsigned ShiftSize = Ty.getSizeInBits();
879 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
880 (ShiftSize == 64 && ShiftAmt <= 63))
881 return 1;
882 return 0;
883}
884
885/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
886/// instruction \p MI.
887bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
888 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
889 // Swap the operands if it would introduce a profitable folding opportunity.
890 // (e.g. a shift + extend).
891 //
892 // For example:
893 // lsl w13, w11, #1
894 // cmp w13, w12
895 // can be turned into:
896 // cmp w12, w11, lsl #1
897
898 // Don't swap if there's a constant on the RHS, because we know we can fold
899 // that.
900 Register RHS = MI.getOperand(3).getReg();
901 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
902 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
903 return false;
904
905 Register LHS = MI.getOperand(2).getReg();
906 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
907 auto GetRegForProfit = [&](Register Reg) {
909 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
910 };
911
912 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
913 // compare, would we be able to fold more instructions?
914 Register TheLHS = GetRegForProfit(LHS);
915 Register TheRHS = GetRegForProfit(RHS);
916
917 // If the LHS is more likely to give us a folding opportunity, then swap the
918 // LHS and RHS.
919 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
921}
922
923void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
924 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
925 Register LHS = MI.getOperand(2).getReg();
926 Register RHS = MI.getOperand(3).getReg();
927 Observer.changedInstr(MI);
928 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
929 MI.getOperand(2).setReg(RHS);
930 MI.getOperand(3).setReg(LHS);
931 Observer.changedInstr(MI);
932}
933
934/// \returns a function which builds a vector floating point compare instruction
935/// for a condition code \p CC.
936/// \param [in] IsZero - True if the comparison is against 0.
937/// \param [in] NoNans - True if the target has NoNansFPMath.
938std::function<Register(MachineIRBuilder &)>
939getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
940 bool NoNans, MachineRegisterInfo &MRI) {
941 LLT DstTy = MRI.getType(LHS);
942 assert(DstTy.isVector() && "Expected vector types only?");
943 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
944 switch (CC) {
945 default:
946 llvm_unreachable("Unexpected condition code!");
947 case AArch64CC::NE:
948 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
949 auto FCmp = IsZero
950 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
951 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
952 return MIB.buildNot(DstTy, FCmp).getReg(0);
953 };
954 case AArch64CC::EQ:
955 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
956 return IsZero
957 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
958 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
959 .getReg(0);
960 };
961 case AArch64CC::GE:
962 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
963 return IsZero
964 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
965 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
966 .getReg(0);
967 };
968 case AArch64CC::GT:
969 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
970 return IsZero
971 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
972 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
973 .getReg(0);
974 };
975 case AArch64CC::LS:
976 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
977 return IsZero
978 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
979 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
980 .getReg(0);
981 };
982 case AArch64CC::MI:
983 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
984 return IsZero
985 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
986 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
987 .getReg(0);
988 };
989 }
990}
991
992/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
993bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
994 MachineIRBuilder &MIB) {
995 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
996 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
997
998 Register Dst = MI.getOperand(0).getReg();
999 LLT DstTy = MRI.getType(Dst);
1000 if (!DstTy.isVector() || !ST.hasNEON())
1001 return false;
1002 Register LHS = MI.getOperand(2).getReg();
1003 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
1004 if (EltSize == 16 && !ST.hasFullFP16())
1005 return false;
1006 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1007 return false;
1008
1009 return true;
1010}
1011
1012/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1013void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1014 MachineIRBuilder &MIB) {
1015 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1016 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1017
1018 const auto &CmpMI = cast<GFCmp>(MI);
1019
1020 Register Dst = CmpMI.getReg(0);
1021 CmpInst::Predicate Pred = CmpMI.getCond();
1022 Register LHS = CmpMI.getLHSReg();
1023 Register RHS = CmpMI.getRHSReg();
1024
1025 LLT DstTy = MRI.getType(Dst);
1026
1027 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1028
1029 // Compares against 0 have special target-specific pseudos.
1030 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1031
1032 bool Invert = false;
1034 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1035 Pred == CmpInst::Predicate::FCMP_UNO) &&
1036 IsZero) {
1037 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1038 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1039 // "ord" normally would.
1040 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1041 // thus equivalent to a != a.
1042 RHS = LHS;
1043 IsZero = false;
1044 CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1045 } else
1046 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1047
1048 // Instead of having an apply function, just build here to simplify things.
1050
1051 const bool NoNans =
1052 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1053
1054 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1055 Register CmpRes;
1056 if (CC2 == AArch64CC::AL)
1057 CmpRes = Cmp(MIB);
1058 else {
1059 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1060 auto Cmp2Dst = Cmp2(MIB);
1061 auto Cmp1Dst = Cmp(MIB);
1062 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1063 }
1064 if (Invert)
1065 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1066 MRI.replaceRegWith(Dst, CmpRes);
1067 MI.eraseFromParent();
1068}
1069
1070// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1071bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1072 auto *GBuildVec = cast<GBuildVector>(&MI);
1073
1074 // Check if the values are all constants
1075 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1076 auto ConstVal =
1077 getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);
1078
1079 if (!ConstVal.has_value())
1080 return true;
1081 }
1082
1083 return false;
1084}
1085
1086void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1088 auto *GBuildVec = cast<GBuildVector>(&MI);
1089 LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1090 Register DstReg = B.buildUndef(DstTy).getReg(0);
1091
1092 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1093 Register SrcReg = GBuildVec->getSourceReg(I);
1094 if (mi_match(SrcReg, MRI, m_GImplicitDef()))
1095 continue;
1096 auto IdxReg = B.buildConstant(LLT::scalar(64), I);
1097 DstReg =
1098 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1099 }
1100 B.buildCopy(GBuildVec->getReg(0), DstReg);
1101 GBuildVec->eraseFromParent();
1102}
1103
1104bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1105 Register &SrcReg) {
1106 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1107 Register DstReg = MI.getOperand(0).getReg();
1108 if (MRI.getType(DstReg).isVector())
1109 return false;
1110 // Match a store of a truncate.
1111 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1112 return false;
1113 // Only form truncstores for value types of max 64b.
1114 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1115}
1116
1117void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1119 Register &SrcReg) {
1120 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1121 Observer.changingInstr(MI);
1122 MI.getOperand(0).setReg(SrcReg);
1123 Observer.changedInstr(MI);
1124}
1125
1126// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1127// form in the first place for combine opportunities, so any remaining ones
1128// at this stage need be lowered back.
1129bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1130 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1131 Register DstReg = MI.getOperand(0).getReg();
1132 LLT DstTy = MRI.getType(DstReg);
1133 return DstTy.isVector();
1134}
1135
1136void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1138 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1139 B.setInstrAndDebugLoc(MI);
1140 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1141 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1142}
1143
1144/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1145/// => unused, <N x t> = unmerge v
1146bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1147 Register &MatchInfo) {
1148 auto &Unmerge = cast<GUnmerge>(MI);
1149 if (Unmerge.getNumDefs() != 2)
1150 return false;
1151 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1152 return false;
1153
1154 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1155 if (!DstTy.isVector())
1156 return false;
1157
1158 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1159 if (!Ext)
1160 return false;
1161
1162 Register ExtSrc1 = Ext->getOperand(1).getReg();
1163 Register ExtSrc2 = Ext->getOperand(2).getReg();
1164 auto LowestVal =
1165 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1166 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1167 return false;
1168
1169 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1170 return false;
1171
1172 MatchInfo = ExtSrc1;
1173 return true;
1174}
1175
1176void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1178 GISelChangeObserver &Observer, Register &SrcReg) {
1179 Observer.changingInstr(MI);
1180 // Swap dst registers.
1181 Register Dst1 = MI.getOperand(0).getReg();
1182 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1183 MI.getOperand(1).setReg(Dst1);
1184 MI.getOperand(2).setReg(SrcReg);
1185 Observer.changedInstr(MI);
1186}
1187
1188// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1189// Match v2s64 mul instructions, which will then be scalarised later on
1190// Doing these two matches in one function to ensure that the order of matching
1191// will always be the same.
1192// Try lowering MUL to MULL before trying to scalarize if needed.
1193bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
1194 // Get the instructions that defined the source operand
1195 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1196 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1197 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1198
1199 if (DstTy.isVector()) {
1200 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1201 unsigned I1Opc = I1->getOpcode();
1202 unsigned I2Opc = I2->getOpcode();
1203 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1204 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1205 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1206 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1207 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1208 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1209 return true;
1210 }
1211 // If result type is v2s64, scalarise the instruction
1212 else if (DstTy == LLT::fixed_vector(2, 64)) {
1213 return true;
1214 }
1215 }
1216 return false;
1217}
1218
1219void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
1221 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1222 "Expected a G_MUL instruction");
1223
1224 // Get the instructions that defined the source operand
1225 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1226 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1227 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1228
1229 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1230 unsigned I1Opc = I1->getOpcode();
1231 unsigned I2Opc = I2->getOpcode();
1232 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1233 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1234 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1235 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1236 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1237 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1238
1239 B.setInstrAndDebugLoc(MI);
1240 B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
1241 : AArch64::G_SMULL,
1242 {MI.getOperand(0).getReg()},
1243 {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
1244 MI.eraseFromParent();
1245 }
1246 // If result type is v2s64, scalarise the instruction
1247 else if (DstTy == LLT::fixed_vector(2, 64)) {
1248 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1249 B.setInstrAndDebugLoc(MI);
1250 Helper.fewerElementsVector(
1251 MI, 0,
1252 DstTy.changeElementCount(
1254 }
1255}
1256
1257class AArch64PostLegalizerLoweringImpl : public Combiner {
1258protected:
1259 const CombinerHelper Helper;
1260 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1261 const AArch64Subtarget &STI;
1262
1263public:
1264 AArch64PostLegalizerLoweringImpl(
1265 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1266 GISelCSEInfo *CSEInfo,
1267 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1268 const AArch64Subtarget &STI);
1269
1270 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1271
1272 bool tryCombineAll(MachineInstr &I) const override;
1273
1274private:
1275#define GET_GICOMBINER_CLASS_MEMBERS
1276#include "AArch64GenPostLegalizeGILowering.inc"
1277#undef GET_GICOMBINER_CLASS_MEMBERS
1278};
1279
1280#define GET_GICOMBINER_IMPL
1281#include "AArch64GenPostLegalizeGILowering.inc"
1282#undef GET_GICOMBINER_IMPL
1283
1284AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1285 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1286 GISelCSEInfo *CSEInfo,
1287 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1288 const AArch64Subtarget &STI)
1289 : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
1290 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1291 STI(STI),
1293#include "AArch64GenPostLegalizeGILowering.inc"
1295{
1296}
1297
1298class AArch64PostLegalizerLowering : public MachineFunctionPass {
1299public:
1300 static char ID;
1301
1302 AArch64PostLegalizerLowering();
1303
1304 StringRef getPassName() const override {
1305 return "AArch64PostLegalizerLowering";
1306 }
1307
1308 bool runOnMachineFunction(MachineFunction &MF) override;
1309 void getAnalysisUsage(AnalysisUsage &AU) const override;
1310
1311private:
1312 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1313};
1314} // end anonymous namespace
1315
1316void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1318 AU.setPreservesCFG();
1321}
1322
1323AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1326
1327 if (!RuleConfig.parseCommandLineOption())
1328 report_fatal_error("Invalid rule identifier");
1329}
1330
1331bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1332 if (MF.getProperties().hasProperty(
1333 MachineFunctionProperties::Property::FailedISel))
1334 return false;
1336 MachineFunctionProperties::Property::Legalized) &&
1337 "Expected a legalized function?");
1338 auto *TPC = &getAnalysis<TargetPassConfig>();
1339 const Function &F = MF.getFunction();
1340
1342 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1343 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1344 F.hasOptSize(), F.hasMinSize());
1345 // Disable fixed-point iteration to reduce compile-time
1346 CInfo.MaxIterations = 1;
1347 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1348 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1349 CInfo.EnableFullDCE = false;
1350 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1351 RuleConfig, ST);
1352 return Impl.combineMachineInstrs();
1353}
1354
1355char AArch64PostLegalizerLowering::ID = 0;
1356INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1357 "Lower AArch64 MachineInstrs after legalization", false,
1358 false)
1360INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1361 "Lower AArch64 MachineInstrs after legalization", false,
1362 false)
1363
1364namespace llvm {
1366 return new AArch64PostLegalizerLowering();
1367}
1368} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isLegalArithImmed(uint64_t C)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
This file declares the targeting of the Machinelegalizer class for AArch64.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Lower AArch64 MachineInstrs after legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
This contains common code to allow clients to notify changes to machine instr.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned logBase2() const
Definition: APInt.h:1739
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:825
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
The CSE Analysis object.
Definition: CSEInfo.h:70
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:251
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:200
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Target-Independent Code Generator Pass Configuration Options.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define UINT64_MAX
Definition: DataTypes.h:77
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
ImplicitDefMatch m_GImplicitDef()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:630
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:471
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:424
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1153
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:418
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.