LLVM 19.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
23#include "AArch64Subtarget.h"
45#include "llvm/IR/InstrTypes.h"
47#include "llvm/Support/Debug.h"
49#include <optional>
50
51#define GET_GICOMBINER_DEPS
52#include "AArch64GenPostLegalizeGILowering.inc"
53#undef GET_GICOMBINER_DEPS
54
55#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
56
57using namespace llvm;
58using namespace MIPatternMatch;
59using namespace AArch64GISelUtils;
60
61namespace {
62
63#define GET_GICOMBINER_TYPES
64#include "AArch64GenPostLegalizeGILowering.inc"
65#undef GET_GICOMBINER_TYPES
66
67/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
68///
69/// Used for matching target-supported shuffles before codegen.
70struct ShuffleVectorPseudo {
71 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
72 Register Dst; ///< Destination register.
73 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
74 ShuffleVectorPseudo(unsigned Opc, Register Dst,
75 std::initializer_list<SrcOp> SrcOps)
76 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
77 ShuffleVectorPseudo() = default;
78};
79
80/// Check if a vector shuffle corresponds to a REV instruction with the
81/// specified blocksize.
82bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
83 unsigned BlockSize) {
84 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
85 "Only possible block sizes for REV are: 16, 32, 64");
86 assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
87
88 unsigned BlockElts = M[0] + 1;
89
90 // If the first shuffle index is UNDEF, be optimistic.
91 if (M[0] < 0)
92 BlockElts = BlockSize / EltSize;
93
94 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
95 return false;
96
97 for (unsigned i = 0; i < NumElts; ++i) {
98 // Ignore undef indices.
99 if (M[i] < 0)
100 continue;
101 if (static_cast<unsigned>(M[i]) !=
102 (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
103 return false;
104 }
105
106 return true;
107}
108
109/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
110/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
111bool isTRNMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
112 if (NumElts % 2 != 0)
113 return false;
114 WhichResult = (M[0] == 0 ? 0 : 1);
115 for (unsigned i = 0; i < NumElts; i += 2) {
116 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
117 (M[i + 1] >= 0 &&
118 static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
119 return false;
120 }
121 return true;
122}
123
124/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
125/// sources of the shuffle are different.
126std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
127 unsigned NumElts) {
128 // Look for the first non-undef element.
129 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
130 if (FirstRealElt == M.end())
131 return std::nullopt;
132
133 // Use APInt to handle overflow when calculating expected element.
134 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
135 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
136
137 // The following shuffle indices must be the successive elements after the
138 // first real element.
139 if (any_of(
140 make_range(std::next(FirstRealElt), M.end()),
141 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
142 return std::nullopt;
143
144 // The index of an EXT is the first element if it is not UNDEF.
145 // Watch out for the beginning UNDEFs. The EXT index should be the expected
146 // value of the first element. E.g.
147 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
148 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
149 // ExpectedElt is the last mask index plus 1.
150 uint64_t Imm = ExpectedElt.getZExtValue();
151 bool ReverseExt = false;
152
153 // There are two difference cases requiring to reverse input vectors.
154 // For example, for vector <4 x i32> we have the following cases,
155 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
156 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
157 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
158 // to reverse two input vectors.
159 if (Imm < NumElts)
160 ReverseExt = true;
161 else
162 Imm -= NumElts;
163 return std::make_pair(ReverseExt, Imm);
164}
165
166/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
167/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
168bool isUZPMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
169 WhichResult = (M[0] == 0 ? 0 : 1);
170 for (unsigned i = 0; i != NumElts; ++i) {
171 // Skip undef indices.
172 if (M[i] < 0)
173 continue;
174 if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
175 return false;
176 }
177 return true;
178}
179
180/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
181/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
182bool isZipMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
183 if (NumElts % 2 != 0)
184 return false;
185
186 // 0 means use ZIP1, 1 means use ZIP2.
187 WhichResult = (M[0] == 0 ? 0 : 1);
188 unsigned Idx = WhichResult * NumElts / 2;
189 for (unsigned i = 0; i != NumElts; i += 2) {
190 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
191 (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
192 return false;
193 Idx += 1;
194 }
195 return true;
196}
197
198/// Helper function for matchINS.
199///
200/// \returns a value when \p M is an ins mask for \p NumInputElements.
201///
202/// First element of the returned pair is true when the produced
203/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
204///
205/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
206std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
207 int NumInputElements) {
208 if (M.size() != static_cast<size_t>(NumInputElements))
209 return std::nullopt;
210 int NumLHSMatch = 0, NumRHSMatch = 0;
211 int LastLHSMismatch = -1, LastRHSMismatch = -1;
212 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
213 if (M[Idx] == -1) {
214 ++NumLHSMatch;
215 ++NumRHSMatch;
216 continue;
217 }
218 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
219 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
220 }
221 const int NumNeededToMatch = NumInputElements - 1;
222 if (NumLHSMatch == NumNeededToMatch)
223 return std::make_pair(true, LastLHSMismatch);
224 if (NumRHSMatch == NumNeededToMatch)
225 return std::make_pair(false, LastRHSMismatch);
226 return std::nullopt;
227}
228
229/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
230/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
231bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
232 ShuffleVectorPseudo &MatchInfo) {
233 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
234 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
235 Register Dst = MI.getOperand(0).getReg();
236 Register Src = MI.getOperand(1).getReg();
237 LLT Ty = MRI.getType(Dst);
238 unsigned EltSize = Ty.getScalarSizeInBits();
239
240 // Element size for a rev cannot be 64.
241 if (EltSize == 64)
242 return false;
243
244 unsigned NumElts = Ty.getNumElements();
245
246 // Try to produce a G_REV instruction
247 for (unsigned LaneSize : {64U, 32U, 16U}) {
248 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
249 unsigned Opcode;
250 if (LaneSize == 64U)
251 Opcode = AArch64::G_REV64;
252 else if (LaneSize == 32U)
253 Opcode = AArch64::G_REV32;
254 else
255 Opcode = AArch64::G_REV16;
256
257 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
258 return true;
259 }
260 }
261
262 return false;
263}
264
265/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
266/// a G_TRN1 or G_TRN2 instruction.
267bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
268 ShuffleVectorPseudo &MatchInfo) {
269 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
270 unsigned WhichResult;
271 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
272 Register Dst = MI.getOperand(0).getReg();
273 unsigned NumElts = MRI.getType(Dst).getNumElements();
274 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
275 return false;
276 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
277 Register V1 = MI.getOperand(1).getReg();
278 Register V2 = MI.getOperand(2).getReg();
279 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
280 return true;
281}
282
283/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
284/// a G_UZP1 or G_UZP2 instruction.
285///
286/// \param [in] MI - The shuffle vector instruction.
287/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
288bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
289 ShuffleVectorPseudo &MatchInfo) {
290 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
291 unsigned WhichResult;
292 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
293 Register Dst = MI.getOperand(0).getReg();
294 unsigned NumElts = MRI.getType(Dst).getNumElements();
295 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
296 return false;
297 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
298 Register V1 = MI.getOperand(1).getReg();
299 Register V2 = MI.getOperand(2).getReg();
300 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
301 return true;
302}
303
304bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
305 ShuffleVectorPseudo &MatchInfo) {
306 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
307 unsigned WhichResult;
308 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
309 Register Dst = MI.getOperand(0).getReg();
310 unsigned NumElts = MRI.getType(Dst).getNumElements();
311 if (!isZipMask(ShuffleMask, NumElts, WhichResult))
312 return false;
313 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
314 Register V1 = MI.getOperand(1).getReg();
315 Register V2 = MI.getOperand(2).getReg();
316 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
317 return true;
318}
319
320/// Helper function for matchDup.
321bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
323 ShuffleVectorPseudo &MatchInfo) {
324 if (Lane != 0)
325 return false;
326
327 // Try to match a vector splat operation into a dup instruction.
328 // We're looking for this pattern:
329 //
330 // %scalar:gpr(s64) = COPY $x0
331 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
332 // %cst0:gpr(s32) = G_CONSTANT i32 0
333 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
334 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
335 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
336 // %zerovec(<2 x s32>)
337 //
338 // ...into:
339 // %splat = G_DUP %scalar
340
341 // Begin matching the insert.
342 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
343 MI.getOperand(1).getReg(), MRI);
344 if (!InsMI)
345 return false;
346 // Match the undef vector operand.
347 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
348 MRI))
349 return false;
350
351 // Match the index constant 0.
352 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
353 return false;
354
355 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
356 {InsMI->getOperand(2).getReg()});
357 return true;
358}
359
360/// Helper function for matchDup.
361bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
363 ShuffleVectorPseudo &MatchInfo) {
364 assert(Lane >= 0 && "Expected positive lane?");
365 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
366 // lane's definition directly.
367 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
368 MI.getOperand(1).getReg(), MRI);
369 if (!BuildVecMI)
370 return false;
371 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
372 MatchInfo =
373 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
374 return true;
375}
376
377bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
378 ShuffleVectorPseudo &MatchInfo) {
379 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
380 auto MaybeLane = getSplatIndex(MI);
381 if (!MaybeLane)
382 return false;
383 int Lane = *MaybeLane;
384 // If this is undef splat, generate it via "just" vdup, if possible.
385 if (Lane < 0)
386 Lane = 0;
387 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
388 return true;
389 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
390 return true;
391 return false;
392}
393
394// Check if an EXT instruction can handle the shuffle mask when the vector
395// sources of the shuffle are the same.
396bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
397 unsigned NumElts = Ty.getNumElements();
398
399 // Assume that the first shuffle index is not UNDEF. Fail if it is.
400 if (M[0] < 0)
401 return false;
402
403 // If this is a VEXT shuffle, the immediate value is the index of the first
404 // element. The other shuffle indices must be the successive elements after
405 // the first one.
406 unsigned ExpectedElt = M[0];
407 for (unsigned I = 1; I < NumElts; ++I) {
408 // Increment the expected index. If it wraps around, just follow it
409 // back to index zero and keep going.
410 ++ExpectedElt;
411 if (ExpectedElt == NumElts)
412 ExpectedElt = 0;
413
414 if (M[I] < 0)
415 continue; // Ignore UNDEF indices.
416 if (ExpectedElt != static_cast<unsigned>(M[I]))
417 return false;
418 }
419
420 return true;
421}
422
423bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
424 ShuffleVectorPseudo &MatchInfo) {
425 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
426 Register Dst = MI.getOperand(0).getReg();
427 LLT DstTy = MRI.getType(Dst);
428 Register V1 = MI.getOperand(1).getReg();
429 Register V2 = MI.getOperand(2).getReg();
430 auto Mask = MI.getOperand(3).getShuffleMask();
432 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
433 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
434
435 if (!ExtInfo) {
436 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
437 !isSingletonExtMask(Mask, DstTy))
438 return false;
439
440 Imm = Mask[0] * ExtFactor;
441 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
442 return true;
443 }
444 bool ReverseExt;
445 std::tie(ReverseExt, Imm) = *ExtInfo;
446 if (ReverseExt)
447 std::swap(V1, V2);
448 Imm *= ExtFactor;
449 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
450 return true;
451}
452
453/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
454/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
455void applyShuffleVectorPseudo(MachineInstr &MI,
456 ShuffleVectorPseudo &MatchInfo) {
457 MachineIRBuilder MIRBuilder(MI);
458 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
459 MI.eraseFromParent();
460}
461
462/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
463/// Special-cased because the constant operand must be emitted as a G_CONSTANT
464/// for the imported tablegen patterns to work.
465void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
466 MachineIRBuilder MIRBuilder(MI);
467 if (MatchInfo.SrcOps[2].getImm() == 0)
468 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
469 else {
470 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
471 auto Cst =
472 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
473 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
474 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
475 }
476 MI.eraseFromParent();
477}
478
479bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
480 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
481
482 auto ValAndVReg =
483 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
484 return !ValAndVReg;
485}
486
487void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
488 MachineIRBuilder &Builder) {
489 auto &Insert = cast<GInsertVectorElement>(MI);
490 Builder.setInstrAndDebugLoc(Insert);
491
492 Register Offset = Insert.getIndexReg();
493 LLT VecTy = MRI.getType(Insert.getReg(0));
494 LLT EltTy = MRI.getType(Insert.getElementReg());
495 LLT IdxTy = MRI.getType(Insert.getIndexReg());
496
497 // Create a stack slot and store the vector into it
498 MachineFunction &MF = Builder.getMF();
499 Align Alignment(
500 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
501 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
502 Alignment, false);
503 LLT FramePtrTy = LLT::pointer(0, 64);
505 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
506
507 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
508
509 // Get the pointer to the element, and be sure not to hit undefined behavior
510 // if the index is out of bounds.
512 "Expected a power-2 vector size");
513 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
514 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
515 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
516 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
517 Register EltPtr =
518 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
519 .getReg(0);
520
521 // Write the inserted element
522 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
523 // Reload the whole vector.
524 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
525 Insert.eraseFromParent();
526}
527
528/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
529/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
530///
531/// e.g.
532/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
533///
534/// Can be represented as
535///
536/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
537/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
538///
539bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
540 std::tuple<Register, int, Register, int> &MatchInfo) {
541 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
542 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
543 Register Dst = MI.getOperand(0).getReg();
544 int NumElts = MRI.getType(Dst).getNumElements();
545 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
546 if (!DstIsLeftAndDstLane)
547 return false;
548 bool DstIsLeft;
549 int DstLane;
550 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
551 Register Left = MI.getOperand(1).getReg();
552 Register Right = MI.getOperand(2).getReg();
553 Register DstVec = DstIsLeft ? Left : Right;
554 Register SrcVec = Left;
555
556 int SrcLane = ShuffleMask[DstLane];
557 if (SrcLane >= NumElts) {
558 SrcVec = Right;
559 SrcLane -= NumElts;
560 }
561
562 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
563 return true;
564}
565
566void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
567 MachineIRBuilder &Builder,
568 std::tuple<Register, int, Register, int> &MatchInfo) {
569 Builder.setInstrAndDebugLoc(MI);
570 Register Dst = MI.getOperand(0).getReg();
571 auto ScalarTy = MRI.getType(Dst).getElementType();
572 Register DstVec, SrcVec;
573 int DstLane, SrcLane;
574 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
575 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
576 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
577 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
578 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
579 MI.eraseFromParent();
580}
581
582/// isVShiftRImm - Check if this is a valid vector for the immediate
583/// operand of a vector shift right operation. The value must be in the range:
584/// 1 <= Value <= ElementBits for a right shift.
586 int64_t &Cnt) {
587 assert(Ty.isVector() && "vector shift count is not a vector type");
588 MachineInstr *MI = MRI.getVRegDef(Reg);
589 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
590 if (!Cst)
591 return false;
592 Cnt = *Cst;
593 int64_t ElementBits = Ty.getScalarSizeInBits();
594 return Cnt >= 1 && Cnt <= ElementBits;
595}
596
597/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
598bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
599 int64_t &Imm) {
600 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
601 MI.getOpcode() == TargetOpcode::G_LSHR);
602 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
603 if (!Ty.isVector())
604 return false;
605 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
606}
607
608void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
609 int64_t &Imm) {
610 unsigned Opc = MI.getOpcode();
611 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
612 unsigned NewOpc =
613 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
614 MachineIRBuilder MIB(MI);
615 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
616 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
617 MI.eraseFromParent();
618}
619
620/// Determine if it is possible to modify the \p RHS and predicate \p P of a
621/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
622///
623/// \returns A pair containing the updated immediate and predicate which may
624/// be used to optimize the instruction.
625///
626/// \note This assumes that the comparison has been legalized.
627std::optional<std::pair<uint64_t, CmpInst::Predicate>>
628tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
629 const MachineRegisterInfo &MRI) {
630 const auto &Ty = MRI.getType(RHS);
631 if (Ty.isVector())
632 return std::nullopt;
633 unsigned Size = Ty.getSizeInBits();
634 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
635
636 // If the RHS is not a constant, or the RHS is already a valid arithmetic
637 // immediate, then there is nothing to change.
638 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
639 if (!ValAndVReg)
640 return std::nullopt;
641 uint64_t C = ValAndVReg->Value.getZExtValue();
642 if (isLegalArithImmed(C))
643 return std::nullopt;
644
645 // We have a non-arithmetic immediate. Check if adjusting the immediate and
646 // adjusting the predicate will result in a legal arithmetic immediate.
647 switch (P) {
648 default:
649 return std::nullopt;
652 // Check for
653 //
654 // x slt c => x sle c - 1
655 // x sge c => x sgt c - 1
656 //
657 // When c is not the smallest possible negative number.
658 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
659 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
660 return std::nullopt;
662 C -= 1;
663 break;
666 // Check for
667 //
668 // x ult c => x ule c - 1
669 // x uge c => x ugt c - 1
670 //
671 // When c is not zero.
672 if (C == 0)
673 return std::nullopt;
675 C -= 1;
676 break;
679 // Check for
680 //
681 // x sle c => x slt c + 1
682 // x sgt c => s sge c + 1
683 //
684 // When c is not the largest possible signed integer.
685 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
686 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
687 return std::nullopt;
689 C += 1;
690 break;
693 // Check for
694 //
695 // x ule c => x ult c + 1
696 // x ugt c => s uge c + 1
697 //
698 // When c is not the largest possible unsigned integer.
699 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
700 (Size == 64 && C == UINT64_MAX))
701 return std::nullopt;
703 C += 1;
704 break;
705 }
706
707 // Check if the new constant is valid, and return the updated constant and
708 // predicate if it is.
709 if (Size == 32)
710 C = static_cast<uint32_t>(C);
711 if (!isLegalArithImmed(C))
712 return std::nullopt;
713 return {{C, P}};
714}
715
716/// Determine whether or not it is possible to update the RHS and predicate of
717/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
718/// immediate.
719///
720/// \p MI - The G_ICMP instruction
721/// \p MatchInfo - The new RHS immediate and predicate on success
722///
723/// See tryAdjustICmpImmAndPred for valid transformations.
724bool matchAdjustICmpImmAndPred(
726 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
727 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
728 Register RHS = MI.getOperand(3).getReg();
729 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
730 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
731 MatchInfo = *MaybeNewImmAndPred;
732 return true;
733 }
734 return false;
735}
736
737void applyAdjustICmpImmAndPred(
738 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
739 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
741 MachineOperand &RHS = MI.getOperand(3);
743 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
744 MatchInfo.first);
745 Observer.changingInstr(MI);
746 RHS.setReg(Cst->getOperand(0).getReg());
747 MI.getOperand(1).setPredicate(MatchInfo.second);
748 Observer.changedInstr(MI);
749}
750
751bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
752 std::pair<unsigned, int> &MatchInfo) {
753 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
754 Register Src1Reg = MI.getOperand(1).getReg();
755 const LLT SrcTy = MRI.getType(Src1Reg);
756 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
757
758 auto LaneIdx = getSplatIndex(MI);
759 if (!LaneIdx)
760 return false;
761
762 // The lane idx should be within the first source vector.
763 if (*LaneIdx >= SrcTy.getNumElements())
764 return false;
765
766 if (DstTy != SrcTy)
767 return false;
768
769 LLT ScalarTy = SrcTy.getElementType();
770 unsigned ScalarSize = ScalarTy.getSizeInBits();
771
772 unsigned Opc = 0;
773 switch (SrcTy.getNumElements()) {
774 case 2:
775 if (ScalarSize == 64)
776 Opc = AArch64::G_DUPLANE64;
777 else if (ScalarSize == 32)
778 Opc = AArch64::G_DUPLANE32;
779 break;
780 case 4:
781 if (ScalarSize == 32)
782 Opc = AArch64::G_DUPLANE32;
783 else if (ScalarSize == 16)
784 Opc = AArch64::G_DUPLANE16;
785 break;
786 case 8:
787 if (ScalarSize == 8)
788 Opc = AArch64::G_DUPLANE8;
789 else if (ScalarSize == 16)
790 Opc = AArch64::G_DUPLANE16;
791 break;
792 case 16:
793 if (ScalarSize == 8)
794 Opc = AArch64::G_DUPLANE8;
795 break;
796 default:
797 break;
798 }
799 if (!Opc)
800 return false;
801
802 MatchInfo.first = Opc;
803 MatchInfo.second = *LaneIdx;
804 return true;
805}
806
807void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
808 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
809 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
810 Register Src1Reg = MI.getOperand(1).getReg();
811 const LLT SrcTy = MRI.getType(Src1Reg);
812
813 B.setInstrAndDebugLoc(MI);
814 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
815
816 Register DupSrc = MI.getOperand(1).getReg();
817 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
818 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
819 if (SrcTy.getSizeInBits() == 64) {
820 auto Undef = B.buildUndef(SrcTy);
821 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
822 {Src1Reg, Undef.getReg(0)})
823 .getReg(0);
824 }
825 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
826 MI.eraseFromParent();
827}
828
829bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
830 auto &Unmerge = cast<GUnmerge>(MI);
831 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
832 const LLT SrcTy = MRI.getType(Src1Reg);
833 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
834 return false;
835 return SrcTy.isVector() && !SrcTy.isScalable() &&
836 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
837}
838
839void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
841 auto &Unmerge = cast<GUnmerge>(MI);
842 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
843 const LLT SrcTy = MRI.getType(Src1Reg);
844 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
845 "Expected a fixed length vector");
846
847 for (int I = 0; I < SrcTy.getNumElements(); ++I)
848 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
849 MI.eraseFromParent();
850}
851
852bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
853 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
855 if (!Splat)
856 return false;
857 if (Splat->isReg())
858 return true;
859 // Later, during selection, we'll try to match imported patterns using
860 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
861 // G_BUILD_VECTORs which could match those patterns.
862 int64_t Cst = Splat->getCst();
863 return (Cst != 0 && Cst != -1);
864}
865
866void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
868 B.setInstrAndDebugLoc(MI);
869 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
870 {MI.getOperand(1).getReg()});
871 MI.eraseFromParent();
872}
873
874/// \returns how many instructions would be saved by folding a G_ICMP's shift
875/// and/or extension operations.
877 // No instructions to save if there's more than one use or no uses.
878 if (!MRI.hasOneNonDBGUse(CmpOp))
879 return 0;
880
881 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
882 auto IsSupportedExtend = [&](const MachineInstr &MI) {
883 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
884 return true;
885 if (MI.getOpcode() != TargetOpcode::G_AND)
886 return false;
887 auto ValAndVReg =
888 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
889 if (!ValAndVReg)
890 return false;
891 uint64_t Mask = ValAndVReg->Value.getZExtValue();
892 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
893 };
894
896 if (IsSupportedExtend(*Def))
897 return 1;
898
899 unsigned Opc = Def->getOpcode();
900 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
901 Opc != TargetOpcode::G_LSHR)
902 return 0;
903
904 auto MaybeShiftAmt =
905 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
906 if (!MaybeShiftAmt)
907 return 0;
908 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
909 MachineInstr *ShiftLHS =
910 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
911
912 // Check if we can fold an extend and a shift.
913 // FIXME: This is duplicated with the selector. (See:
914 // selectArithExtendedRegister)
915 if (IsSupportedExtend(*ShiftLHS))
916 return (ShiftAmt <= 4) ? 2 : 1;
917
918 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
919 if (Ty.isVector())
920 return 0;
921 unsigned ShiftSize = Ty.getSizeInBits();
922 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
923 (ShiftSize == 64 && ShiftAmt <= 63))
924 return 1;
925 return 0;
926}
927
928/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
929/// instruction \p MI.
930bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
931 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
932 // Swap the operands if it would introduce a profitable folding opportunity.
933 // (e.g. a shift + extend).
934 //
935 // For example:
936 // lsl w13, w11, #1
937 // cmp w13, w12
938 // can be turned into:
939 // cmp w12, w11, lsl #1
940
941 // Don't swap if there's a constant on the RHS, because we know we can fold
942 // that.
943 Register RHS = MI.getOperand(3).getReg();
944 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
945 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
946 return false;
947
948 Register LHS = MI.getOperand(2).getReg();
949 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
950 auto GetRegForProfit = [&](Register Reg) {
952 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
953 };
954
955 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
956 // compare, would we be able to fold more instructions?
957 Register TheLHS = GetRegForProfit(LHS);
958 Register TheRHS = GetRegForProfit(RHS);
959
960 // If the LHS is more likely to give us a folding opportunity, then swap the
961 // LHS and RHS.
962 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
964}
965
966void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
967 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
968 Register LHS = MI.getOperand(2).getReg();
969 Register RHS = MI.getOperand(3).getReg();
970 Observer.changedInstr(MI);
971 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
972 MI.getOperand(2).setReg(RHS);
973 MI.getOperand(3).setReg(LHS);
974 Observer.changedInstr(MI);
975}
976
977/// \returns a function which builds a vector floating point compare instruction
978/// for a condition code \p CC.
979/// \param [in] IsZero - True if the comparison is against 0.
980/// \param [in] NoNans - True if the target has NoNansFPMath.
981std::function<Register(MachineIRBuilder &)>
982getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
983 bool NoNans, MachineRegisterInfo &MRI) {
984 LLT DstTy = MRI.getType(LHS);
985 assert(DstTy.isVector() && "Expected vector types only?");
986 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
987 switch (CC) {
988 default:
989 llvm_unreachable("Unexpected condition code!");
990 case AArch64CC::NE:
991 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
992 auto FCmp = IsZero
993 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
994 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
995 return MIB.buildNot(DstTy, FCmp).getReg(0);
996 };
997 case AArch64CC::EQ:
998 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
999 return IsZero
1000 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
1001 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
1002 .getReg(0);
1003 };
1004 case AArch64CC::GE:
1005 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
1006 return IsZero
1007 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
1008 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
1009 .getReg(0);
1010 };
1011 case AArch64CC::GT:
1012 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
1013 return IsZero
1014 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
1015 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
1016 .getReg(0);
1017 };
1018 case AArch64CC::LS:
1019 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
1020 return IsZero
1021 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
1022 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
1023 .getReg(0);
1024 };
1025 case AArch64CC::MI:
1026 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
1027 return IsZero
1028 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
1029 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
1030 .getReg(0);
1031 };
1032 }
1033}
1034
1035/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1036bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1037 MachineIRBuilder &MIB) {
1038 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1039 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1040
1041 Register Dst = MI.getOperand(0).getReg();
1042 LLT DstTy = MRI.getType(Dst);
1043 if (!DstTy.isVector() || !ST.hasNEON())
1044 return false;
1045 Register LHS = MI.getOperand(2).getReg();
1046 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
1047 if (EltSize == 16 && !ST.hasFullFP16())
1048 return false;
1049 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1050 return false;
1051
1052 return true;
1053}
1054
1055/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1056void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1057 MachineIRBuilder &MIB) {
1058 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1059 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1060
1061 const auto &CmpMI = cast<GFCmp>(MI);
1062
1063 Register Dst = CmpMI.getReg(0);
1064 CmpInst::Predicate Pred = CmpMI.getCond();
1065 Register LHS = CmpMI.getLHSReg();
1066 Register RHS = CmpMI.getRHSReg();
1067
1068 LLT DstTy = MRI.getType(Dst);
1069
1070 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1071
1072 // Compares against 0 have special target-specific pseudos.
1073 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1074
1075 bool Invert = false;
1077 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1078 Pred == CmpInst::Predicate::FCMP_UNO) &&
1079 IsZero) {
1080 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1081 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1082 // "ord" normally would.
1083 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1084 // thus equivalent to a != a.
1085 RHS = LHS;
1086 IsZero = false;
1087 CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1088 } else
1089 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1090
1091 // Instead of having an apply function, just build here to simplify things.
1093
1094 const bool NoNans =
1095 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1096
1097 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1098 Register CmpRes;
1099 if (CC2 == AArch64CC::AL)
1100 CmpRes = Cmp(MIB);
1101 else {
1102 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1103 auto Cmp2Dst = Cmp2(MIB);
1104 auto Cmp1Dst = Cmp(MIB);
1105 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1106 }
1107 if (Invert)
1108 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1109 MRI.replaceRegWith(Dst, CmpRes);
1110 MI.eraseFromParent();
1111}
1112
1113bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1114 Register &SrcReg) {
1115 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1116 Register DstReg = MI.getOperand(0).getReg();
1117 if (MRI.getType(DstReg).isVector())
1118 return false;
1119 // Match a store of a truncate.
1120 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1121 return false;
1122 // Only form truncstores for value types of max 64b.
1123 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1124}
1125
1126void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1128 Register &SrcReg) {
1129 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1130 Observer.changingInstr(MI);
1131 MI.getOperand(0).setReg(SrcReg);
1132 Observer.changedInstr(MI);
1133}
1134
1135// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1136// form in the first place for combine opportunities, so any remaining ones
1137// at this stage need be lowered back.
1138bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1139 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1140 Register DstReg = MI.getOperand(0).getReg();
1141 LLT DstTy = MRI.getType(DstReg);
1142 return DstTy.isVector();
1143}
1144
1145void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1147 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1148 B.setInstrAndDebugLoc(MI);
1149 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1150 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1151}
1152
1153/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1154/// => unused, <N x t> = unmerge v
1155bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1156 Register &MatchInfo) {
1157 auto &Unmerge = cast<GUnmerge>(MI);
1158 if (Unmerge.getNumDefs() != 2)
1159 return false;
1160 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1161 return false;
1162
1163 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1164 if (!DstTy.isVector())
1165 return false;
1166
1167 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1168 if (!Ext)
1169 return false;
1170
1171 Register ExtSrc1 = Ext->getOperand(1).getReg();
1172 Register ExtSrc2 = Ext->getOperand(2).getReg();
1173 auto LowestVal =
1174 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1175 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1176 return false;
1177
1178 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1179 return false;
1180
1181 MatchInfo = ExtSrc1;
1182 return true;
1183}
1184
1185void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1187 GISelChangeObserver &Observer, Register &SrcReg) {
1188 Observer.changingInstr(MI);
1189 // Swap dst registers.
1190 Register Dst1 = MI.getOperand(0).getReg();
1191 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1192 MI.getOperand(1).setReg(Dst1);
1193 MI.getOperand(2).setReg(SrcReg);
1194 Observer.changedInstr(MI);
1195}
1196
1197// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1198// Match v2s64 mul instructions, which will then be scalarised later on
1199// Doing these two matches in one function to ensure that the order of matching
1200// will always be the same.
1201// Try lowering MUL to MULL before trying to scalarize if needed.
1202bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
1203 // Get the instructions that defined the source operand
1204 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1205 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1206 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1207
1208 if (DstTy.isVector()) {
1209 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1210 unsigned I1Opc = I1->getOpcode();
1211 unsigned I2Opc = I2->getOpcode();
1212 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1213 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1214 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1215 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1216 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1217 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1218 return true;
1219 }
1220 // If result type is v2s64, scalarise the instruction
1221 else if (DstTy == LLT::fixed_vector(2, 64)) {
1222 return true;
1223 }
1224 }
1225 return false;
1226}
1227
1228void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
1230 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1231 "Expected a G_MUL instruction");
1232
1233 // Get the instructions that defined the source operand
1234 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1235 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1236 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1237
1238 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1239 unsigned I1Opc = I1->getOpcode();
1240 unsigned I2Opc = I2->getOpcode();
1241 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1242 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1243 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1244 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1245 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1246 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1247
1248 B.setInstrAndDebugLoc(MI);
1249 B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
1250 : AArch64::G_SMULL,
1251 {MI.getOperand(0).getReg()},
1252 {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
1253 MI.eraseFromParent();
1254 }
1255 // If result type is v2s64, scalarise the instruction
1256 else if (DstTy == LLT::fixed_vector(2, 64)) {
1257 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1258 B.setInstrAndDebugLoc(MI);
1259 Helper.fewerElementsVector(
1260 MI, 0,
1261 DstTy.changeElementCount(
1263 }
1264}
1265
1266class AArch64PostLegalizerLoweringImpl : public Combiner {
1267protected:
1268 // TODO: Make CombinerHelper methods const.
1269 mutable CombinerHelper Helper;
1270 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1271 const AArch64Subtarget &STI;
1272
1273public:
1274 AArch64PostLegalizerLoweringImpl(
1275 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1276 GISelCSEInfo *CSEInfo,
1277 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1278 const AArch64Subtarget &STI);
1279
1280 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1281
1282 bool tryCombineAll(MachineInstr &I) const override;
1283
1284private:
1285#define GET_GICOMBINER_CLASS_MEMBERS
1286#include "AArch64GenPostLegalizeGILowering.inc"
1287#undef GET_GICOMBINER_CLASS_MEMBERS
1288};
1289
1290#define GET_GICOMBINER_IMPL
1291#include "AArch64GenPostLegalizeGILowering.inc"
1292#undef GET_GICOMBINER_IMPL
1293
1294AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1295 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1296 GISelCSEInfo *CSEInfo,
1297 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1298 const AArch64Subtarget &STI)
1299 : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
1300 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1301 STI(STI),
1303#include "AArch64GenPostLegalizeGILowering.inc"
1305{
1306}
1307
1308class AArch64PostLegalizerLowering : public MachineFunctionPass {
1309public:
1310 static char ID;
1311
1312 AArch64PostLegalizerLowering();
1313
1314 StringRef getPassName() const override {
1315 return "AArch64PostLegalizerLowering";
1316 }
1317
1318 bool runOnMachineFunction(MachineFunction &MF) override;
1319 void getAnalysisUsage(AnalysisUsage &AU) const override;
1320
1321private:
1322 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1323};
1324} // end anonymous namespace
1325
1326void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1328 AU.setPreservesCFG();
1331}
1332
1333AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1336
1337 if (!RuleConfig.parseCommandLineOption())
1338 report_fatal_error("Invalid rule identifier");
1339}
1340
1341bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1342 if (MF.getProperties().hasProperty(
1343 MachineFunctionProperties::Property::FailedISel))
1344 return false;
1346 MachineFunctionProperties::Property::Legalized) &&
1347 "Expected a legalized function?");
1348 auto *TPC = &getAnalysis<TargetPassConfig>();
1349 const Function &F = MF.getFunction();
1350
1352 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1353 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1354 F.hasOptSize(), F.hasMinSize());
1355 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1356 RuleConfig, ST);
1357 return Impl.combineMachineInstrs();
1358}
1359
1360char AArch64PostLegalizerLowering::ID = 0;
1361INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1362 "Lower AArch64 MachineInstrs after legalization", false,
1363 false)
1365INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1366 "Lower AArch64 MachineInstrs after legalization", false,
1367 false)
1368
1369namespace llvm {
1371 return new AArch64PostLegalizerLowering();
1372}
1373} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isLegalArithImmed(uint64_t C)
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static bool isCMN(SDValue Op, ISD::CondCode CC)
This file declares the targeting of the Machinelegalizer class for AArch64.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Lower AArch64 MachineInstrs after legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
This contains common code to allow clients to notify changes to machine instr.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const int BlockSize
Definition: TarWriter.cpp:33
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned logBase2() const
Definition: APInt.h:1703
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
The CSE Analysis object.
Definition: CSEInfo.h:69
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target-Independent Code Generator Pass Configuration Options.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:239
#define UINT64_MAX
Definition: DataTypes.h:77
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &)
bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1140
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.