LLVM 20.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64ExpandImm.h"
25#include "AArch64Subtarget.h"
47#include "llvm/IR/InstrTypes.h"
49#include "llvm/Support/Debug.h"
51#include <optional>
52
53#define GET_GICOMBINER_DEPS
54#include "AArch64GenPostLegalizeGILowering.inc"
55#undef GET_GICOMBINER_DEPS
56
57#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
58
59using namespace llvm;
60using namespace MIPatternMatch;
61using namespace AArch64GISelUtils;
62
63namespace {
64
65#define GET_GICOMBINER_TYPES
66#include "AArch64GenPostLegalizeGILowering.inc"
67#undef GET_GICOMBINER_TYPES
68
69/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
70///
71/// Used for matching target-supported shuffles before codegen.
72struct ShuffleVectorPseudo {
73 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
74 Register Dst; ///< Destination register.
75 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
76 ShuffleVectorPseudo(unsigned Opc, Register Dst,
77 std::initializer_list<SrcOp> SrcOps)
78 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
79 ShuffleVectorPseudo() = default;
80};
81
82/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
83/// sources of the shuffle are different.
84std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
85 unsigned NumElts) {
86 // Look for the first non-undef element.
87 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
88 if (FirstRealElt == M.end())
89 return std::nullopt;
90
91 // Use APInt to handle overflow when calculating expected element.
92 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
93 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
94
95 // The following shuffle indices must be the successive elements after the
96 // first real element.
97 if (any_of(
98 make_range(std::next(FirstRealElt), M.end()),
99 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
100 return std::nullopt;
101
102 // The index of an EXT is the first element if it is not UNDEF.
103 // Watch out for the beginning UNDEFs. The EXT index should be the expected
104 // value of the first element. E.g.
105 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
106 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
107 // ExpectedElt is the last mask index plus 1.
108 uint64_t Imm = ExpectedElt.getZExtValue();
109 bool ReverseExt = false;
110
111 // There are two difference cases requiring to reverse input vectors.
112 // For example, for vector <4 x i32> we have the following cases,
113 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
114 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
115 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
116 // to reverse two input vectors.
117 if (Imm < NumElts)
118 ReverseExt = true;
119 else
120 Imm -= NumElts;
121 return std::make_pair(ReverseExt, Imm);
122}
123
124/// Helper function for matchINS.
125///
126/// \returns a value when \p M is an ins mask for \p NumInputElements.
127///
128/// First element of the returned pair is true when the produced
129/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
130///
131/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
132std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
133 int NumInputElements) {
134 if (M.size() != static_cast<size_t>(NumInputElements))
135 return std::nullopt;
136 int NumLHSMatch = 0, NumRHSMatch = 0;
137 int LastLHSMismatch = -1, LastRHSMismatch = -1;
138 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
139 if (M[Idx] == -1) {
140 ++NumLHSMatch;
141 ++NumRHSMatch;
142 continue;
143 }
144 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
145 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
146 }
147 const int NumNeededToMatch = NumInputElements - 1;
148 if (NumLHSMatch == NumNeededToMatch)
149 return std::make_pair(true, LastLHSMismatch);
150 if (NumRHSMatch == NumNeededToMatch)
151 return std::make_pair(false, LastRHSMismatch);
152 return std::nullopt;
153}
154
155/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
156/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
157bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
158 ShuffleVectorPseudo &MatchInfo) {
159 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
160 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
161 Register Dst = MI.getOperand(0).getReg();
162 Register Src = MI.getOperand(1).getReg();
163 LLT Ty = MRI.getType(Dst);
164 unsigned EltSize = Ty.getScalarSizeInBits();
165
166 // Element size for a rev cannot be 64.
167 if (EltSize == 64)
168 return false;
169
170 unsigned NumElts = Ty.getNumElements();
171
172 // Try to produce a G_REV instruction
173 for (unsigned LaneSize : {64U, 32U, 16U}) {
174 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
175 unsigned Opcode;
176 if (LaneSize == 64U)
177 Opcode = AArch64::G_REV64;
178 else if (LaneSize == 32U)
179 Opcode = AArch64::G_REV32;
180 else
181 Opcode = AArch64::G_REV16;
182
183 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
184 return true;
185 }
186 }
187
188 return false;
189}
190
191/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
192/// a G_TRN1 or G_TRN2 instruction.
193bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
194 ShuffleVectorPseudo &MatchInfo) {
195 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
196 unsigned WhichResult;
197 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
198 Register Dst = MI.getOperand(0).getReg();
199 unsigned NumElts = MRI.getType(Dst).getNumElements();
200 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
201 return false;
202 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
203 Register V1 = MI.getOperand(1).getReg();
204 Register V2 = MI.getOperand(2).getReg();
205 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
206 return true;
207}
208
209/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
210/// a G_UZP1 or G_UZP2 instruction.
211///
212/// \param [in] MI - The shuffle vector instruction.
213/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
214bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
215 ShuffleVectorPseudo &MatchInfo) {
216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217 unsigned WhichResult;
218 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
219 Register Dst = MI.getOperand(0).getReg();
220 unsigned NumElts = MRI.getType(Dst).getNumElements();
221 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
222 return false;
223 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
224 Register V1 = MI.getOperand(1).getReg();
225 Register V2 = MI.getOperand(2).getReg();
226 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
227 return true;
228}
229
230bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
231 ShuffleVectorPseudo &MatchInfo) {
232 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
233 unsigned WhichResult;
234 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
235 Register Dst = MI.getOperand(0).getReg();
236 unsigned NumElts = MRI.getType(Dst).getNumElements();
237 if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
238 return false;
239 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
240 Register V1 = MI.getOperand(1).getReg();
241 Register V2 = MI.getOperand(2).getReg();
242 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
243 return true;
244}
245
246/// Helper function for matchDup.
247bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
249 ShuffleVectorPseudo &MatchInfo) {
250 if (Lane != 0)
251 return false;
252
253 // Try to match a vector splat operation into a dup instruction.
254 // We're looking for this pattern:
255 //
256 // %scalar:gpr(s64) = COPY $x0
257 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
258 // %cst0:gpr(s32) = G_CONSTANT i32 0
259 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
260 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
261 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
262 // %zerovec(<2 x s32>)
263 //
264 // ...into:
265 // %splat = G_DUP %scalar
266
267 // Begin matching the insert.
268 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
269 MI.getOperand(1).getReg(), MRI);
270 if (!InsMI)
271 return false;
272 // Match the undef vector operand.
273 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
274 MRI))
275 return false;
276
277 // Match the index constant 0.
278 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
279 return false;
280
281 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
282 {InsMI->getOperand(2).getReg()});
283 return true;
284}
285
286/// Helper function for matchDup.
287bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
289 ShuffleVectorPseudo &MatchInfo) {
290 assert(Lane >= 0 && "Expected positive lane?");
291 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
292 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
293 // lane's definition directly.
294 auto *BuildVecMI =
295 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
296 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
297 // If Lane >= NumElements then it is point to RHS, just check from RHS
298 if (NumElements <= Lane)
299 Lane -= NumElements;
300
301 if (!BuildVecMI)
302 return false;
303 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
304 MatchInfo =
305 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
306 return true;
307}
308
309bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
310 ShuffleVectorPseudo &MatchInfo) {
311 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
312 auto MaybeLane = getSplatIndex(MI);
313 if (!MaybeLane)
314 return false;
315 int Lane = *MaybeLane;
316 // If this is undef splat, generate it via "just" vdup, if possible.
317 if (Lane < 0)
318 Lane = 0;
319 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
320 return true;
321 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
322 return true;
323 return false;
324}
325
326// Check if an EXT instruction can handle the shuffle mask when the vector
327// sources of the shuffle are the same.
328bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
329 unsigned NumElts = Ty.getNumElements();
330
331 // Assume that the first shuffle index is not UNDEF. Fail if it is.
332 if (M[0] < 0)
333 return false;
334
335 // If this is a VEXT shuffle, the immediate value is the index of the first
336 // element. The other shuffle indices must be the successive elements after
337 // the first one.
338 unsigned ExpectedElt = M[0];
339 for (unsigned I = 1; I < NumElts; ++I) {
340 // Increment the expected index. If it wraps around, just follow it
341 // back to index zero and keep going.
342 ++ExpectedElt;
343 if (ExpectedElt == NumElts)
344 ExpectedElt = 0;
345
346 if (M[I] < 0)
347 continue; // Ignore UNDEF indices.
348 if (ExpectedElt != static_cast<unsigned>(M[I]))
349 return false;
350 }
351
352 return true;
353}
354
355bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
356 ShuffleVectorPseudo &MatchInfo) {
357 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
358 Register Dst = MI.getOperand(0).getReg();
359 LLT DstTy = MRI.getType(Dst);
360 Register V1 = MI.getOperand(1).getReg();
361 Register V2 = MI.getOperand(2).getReg();
362 auto Mask = MI.getOperand(3).getShuffleMask();
364 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
365 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
366
367 if (!ExtInfo) {
368 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
369 !isSingletonExtMask(Mask, DstTy))
370 return false;
371
372 Imm = Mask[0] * ExtFactor;
373 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
374 return true;
375 }
376 bool ReverseExt;
377 std::tie(ReverseExt, Imm) = *ExtInfo;
378 if (ReverseExt)
379 std::swap(V1, V2);
380 Imm *= ExtFactor;
381 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
382 return true;
383}
384
385/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
386/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
387void applyShuffleVectorPseudo(MachineInstr &MI,
388 ShuffleVectorPseudo &MatchInfo) {
389 MachineIRBuilder MIRBuilder(MI);
390 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
391 MI.eraseFromParent();
392}
393
394/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
395/// Special-cased because the constant operand must be emitted as a G_CONSTANT
396/// for the imported tablegen patterns to work.
397void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
398 MachineIRBuilder MIRBuilder(MI);
399 if (MatchInfo.SrcOps[2].getImm() == 0)
400 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
401 else {
402 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
403 auto Cst =
404 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
405 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
406 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
407 }
408 MI.eraseFromParent();
409}
410
411bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
412 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
413
414 auto ValAndVReg =
415 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
416 return !ValAndVReg;
417}
418
419void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
420 MachineIRBuilder &Builder) {
421 auto &Insert = cast<GInsertVectorElement>(MI);
422 Builder.setInstrAndDebugLoc(Insert);
423
424 Register Offset = Insert.getIndexReg();
425 LLT VecTy = MRI.getType(Insert.getReg(0));
426 LLT EltTy = MRI.getType(Insert.getElementReg());
427 LLT IdxTy = MRI.getType(Insert.getIndexReg());
428
429 // Create a stack slot and store the vector into it
430 MachineFunction &MF = Builder.getMF();
431 Align Alignment(
432 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
433 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
434 Alignment, false);
435 LLT FramePtrTy = LLT::pointer(0, 64);
437 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
438
439 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
440
441 // Get the pointer to the element, and be sure not to hit undefined behavior
442 // if the index is out of bounds.
444 "Expected a power-2 vector size");
445 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
446 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
447 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
448 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
449 Register EltPtr =
450 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
451 .getReg(0);
452
453 // Write the inserted element
454 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
455 // Reload the whole vector.
456 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
457 Insert.eraseFromParent();
458}
459
460/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
461/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
462///
463/// e.g.
464/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
465///
466/// Can be represented as
467///
468/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
469/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
470///
471bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
472 std::tuple<Register, int, Register, int> &MatchInfo) {
473 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
474 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
475 Register Dst = MI.getOperand(0).getReg();
476 int NumElts = MRI.getType(Dst).getNumElements();
477 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
478 if (!DstIsLeftAndDstLane)
479 return false;
480 bool DstIsLeft;
481 int DstLane;
482 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
483 Register Left = MI.getOperand(1).getReg();
484 Register Right = MI.getOperand(2).getReg();
485 Register DstVec = DstIsLeft ? Left : Right;
486 Register SrcVec = Left;
487
488 int SrcLane = ShuffleMask[DstLane];
489 if (SrcLane >= NumElts) {
490 SrcVec = Right;
491 SrcLane -= NumElts;
492 }
493
494 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
495 return true;
496}
497
498void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
499 MachineIRBuilder &Builder,
500 std::tuple<Register, int, Register, int> &MatchInfo) {
501 Builder.setInstrAndDebugLoc(MI);
502 Register Dst = MI.getOperand(0).getReg();
503 auto ScalarTy = MRI.getType(Dst).getElementType();
504 Register DstVec, SrcVec;
505 int DstLane, SrcLane;
506 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
507 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
508 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
509 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
510 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
511 MI.eraseFromParent();
512}
513
514/// isVShiftRImm - Check if this is a valid vector for the immediate
515/// operand of a vector shift right operation. The value must be in the range:
516/// 1 <= Value <= ElementBits for a right shift.
518 int64_t &Cnt) {
519 assert(Ty.isVector() && "vector shift count is not a vector type");
520 MachineInstr *MI = MRI.getVRegDef(Reg);
521 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
522 if (!Cst)
523 return false;
524 Cnt = *Cst;
525 int64_t ElementBits = Ty.getScalarSizeInBits();
526 return Cnt >= 1 && Cnt <= ElementBits;
527}
528
529/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
530bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
531 int64_t &Imm) {
532 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
533 MI.getOpcode() == TargetOpcode::G_LSHR);
534 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
535 if (!Ty.isVector())
536 return false;
537 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
538}
539
540void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
541 int64_t &Imm) {
542 unsigned Opc = MI.getOpcode();
543 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
544 unsigned NewOpc =
545 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
546 MachineIRBuilder MIB(MI);
547 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
548 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
549 MI.eraseFromParent();
550}
551
552/// Determine if it is possible to modify the \p RHS and predicate \p P of a
553/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
554///
555/// \returns A pair containing the updated immediate and predicate which may
556/// be used to optimize the instruction.
557///
558/// \note This assumes that the comparison has been legalized.
559std::optional<std::pair<uint64_t, CmpInst::Predicate>>
560tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
561 const MachineRegisterInfo &MRI) {
562 const auto &Ty = MRI.getType(RHS);
563 if (Ty.isVector())
564 return std::nullopt;
565 unsigned Size = Ty.getSizeInBits();
566 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
567
568 // If the RHS is not a constant, or the RHS is already a valid arithmetic
569 // immediate, then there is nothing to change.
570 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
571 if (!ValAndVReg)
572 return std::nullopt;
573 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
574 uint64_t C = OriginalC;
575 if (isLegalArithImmed(C))
576 return std::nullopt;
577
578 // We have a non-arithmetic immediate. Check if adjusting the immediate and
579 // adjusting the predicate will result in a legal arithmetic immediate.
580 switch (P) {
581 default:
582 return std::nullopt;
585 // Check for
586 //
587 // x slt c => x sle c - 1
588 // x sge c => x sgt c - 1
589 //
590 // When c is not the smallest possible negative number.
591 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
592 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
593 return std::nullopt;
595 C -= 1;
596 break;
599 // Check for
600 //
601 // x ult c => x ule c - 1
602 // x uge c => x ugt c - 1
603 //
604 // When c is not zero.
605 if (C == 0)
606 return std::nullopt;
608 C -= 1;
609 break;
612 // Check for
613 //
614 // x sle c => x slt c + 1
615 // x sgt c => s sge c + 1
616 //
617 // When c is not the largest possible signed integer.
618 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
619 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
620 return std::nullopt;
622 C += 1;
623 break;
626 // Check for
627 //
628 // x ule c => x ult c + 1
629 // x ugt c => s uge c + 1
630 //
631 // When c is not the largest possible unsigned integer.
632 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
633 (Size == 64 && C == UINT64_MAX))
634 return std::nullopt;
636 C += 1;
637 break;
638 }
639
640 // Check if the new constant is valid, and return the updated constant and
641 // predicate if it is.
642 if (Size == 32)
643 C = static_cast<uint32_t>(C);
644 if (isLegalArithImmed(C))
645 return {{C, P}};
646
647 auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) {
650 return Insn.size() == 1;
651 };
652
653 if (!IsMaterializableInSingleInstruction(OriginalC) &&
654 IsMaterializableInSingleInstruction(C))
655 return {{C, P}};
656
657 return std::nullopt;
658}
659
660/// Determine whether or not it is possible to update the RHS and predicate of
661/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
662/// immediate.
663///
664/// \p MI - The G_ICMP instruction
665/// \p MatchInfo - The new RHS immediate and predicate on success
666///
667/// See tryAdjustICmpImmAndPred for valid transformations.
668bool matchAdjustICmpImmAndPred(
670 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
671 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
672 Register RHS = MI.getOperand(3).getReg();
673 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
674 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
675 MatchInfo = *MaybeNewImmAndPred;
676 return true;
677 }
678 return false;
679}
680
681void applyAdjustICmpImmAndPred(
682 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
683 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
685 MachineOperand &RHS = MI.getOperand(3);
687 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
688 MatchInfo.first);
689 Observer.changingInstr(MI);
690 RHS.setReg(Cst->getOperand(0).getReg());
691 MI.getOperand(1).setPredicate(MatchInfo.second);
692 Observer.changedInstr(MI);
693}
694
695bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
696 std::pair<unsigned, int> &MatchInfo) {
697 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
698 Register Src1Reg = MI.getOperand(1).getReg();
699 const LLT SrcTy = MRI.getType(Src1Reg);
700 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
701
702 auto LaneIdx = getSplatIndex(MI);
703 if (!LaneIdx)
704 return false;
705
706 // The lane idx should be within the first source vector.
707 if (*LaneIdx >= SrcTy.getNumElements())
708 return false;
709
710 if (DstTy != SrcTy)
711 return false;
712
713 LLT ScalarTy = SrcTy.getElementType();
714 unsigned ScalarSize = ScalarTy.getSizeInBits();
715
716 unsigned Opc = 0;
717 switch (SrcTy.getNumElements()) {
718 case 2:
719 if (ScalarSize == 64)
720 Opc = AArch64::G_DUPLANE64;
721 else if (ScalarSize == 32)
722 Opc = AArch64::G_DUPLANE32;
723 break;
724 case 4:
725 if (ScalarSize == 32)
726 Opc = AArch64::G_DUPLANE32;
727 else if (ScalarSize == 16)
728 Opc = AArch64::G_DUPLANE16;
729 break;
730 case 8:
731 if (ScalarSize == 8)
732 Opc = AArch64::G_DUPLANE8;
733 else if (ScalarSize == 16)
734 Opc = AArch64::G_DUPLANE16;
735 break;
736 case 16:
737 if (ScalarSize == 8)
738 Opc = AArch64::G_DUPLANE8;
739 break;
740 default:
741 break;
742 }
743 if (!Opc)
744 return false;
745
746 MatchInfo.first = Opc;
747 MatchInfo.second = *LaneIdx;
748 return true;
749}
750
751void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
752 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
753 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
754 Register Src1Reg = MI.getOperand(1).getReg();
755 const LLT SrcTy = MRI.getType(Src1Reg);
756
757 B.setInstrAndDebugLoc(MI);
758 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
759
760 Register DupSrc = MI.getOperand(1).getReg();
761 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
762 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
763 if (SrcTy.getSizeInBits() == 64) {
764 auto Undef = B.buildUndef(SrcTy);
765 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
766 {Src1Reg, Undef.getReg(0)})
767 .getReg(0);
768 }
769 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
770 MI.eraseFromParent();
771}
772
773bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
774 auto &Unmerge = cast<GUnmerge>(MI);
775 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
776 const LLT SrcTy = MRI.getType(Src1Reg);
777 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
778 return false;
779 return SrcTy.isVector() && !SrcTy.isScalable() &&
780 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
781}
782
783void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
785 auto &Unmerge = cast<GUnmerge>(MI);
786 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
787 const LLT SrcTy = MRI.getType(Src1Reg);
788 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
789 "Expected a fixed length vector");
790
791 for (int I = 0; I < SrcTy.getNumElements(); ++I)
792 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
793 MI.eraseFromParent();
794}
795
796bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
797 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
799 if (!Splat)
800 return false;
801 if (Splat->isReg())
802 return true;
803 // Later, during selection, we'll try to match imported patterns using
804 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
805 // G_BUILD_VECTORs which could match those patterns.
806 int64_t Cst = Splat->getCst();
807 return (Cst != 0 && Cst != -1);
808}
809
810void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
812 B.setInstrAndDebugLoc(MI);
813 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
814 {MI.getOperand(1).getReg()});
815 MI.eraseFromParent();
816}
817
818/// \returns how many instructions would be saved by folding a G_ICMP's shift
819/// and/or extension operations.
821 // No instructions to save if there's more than one use or no uses.
822 if (!MRI.hasOneNonDBGUse(CmpOp))
823 return 0;
824
825 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
826 auto IsSupportedExtend = [&](const MachineInstr &MI) {
827 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
828 return true;
829 if (MI.getOpcode() != TargetOpcode::G_AND)
830 return false;
831 auto ValAndVReg =
832 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
833 if (!ValAndVReg)
834 return false;
835 uint64_t Mask = ValAndVReg->Value.getZExtValue();
836 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
837 };
838
840 if (IsSupportedExtend(*Def))
841 return 1;
842
843 unsigned Opc = Def->getOpcode();
844 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
845 Opc != TargetOpcode::G_LSHR)
846 return 0;
847
848 auto MaybeShiftAmt =
849 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
850 if (!MaybeShiftAmt)
851 return 0;
852 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
853 MachineInstr *ShiftLHS =
854 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
855
856 // Check if we can fold an extend and a shift.
857 // FIXME: This is duplicated with the selector. (See:
858 // selectArithExtendedRegister)
859 if (IsSupportedExtend(*ShiftLHS))
860 return (ShiftAmt <= 4) ? 2 : 1;
861
862 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
863 if (Ty.isVector())
864 return 0;
865 unsigned ShiftSize = Ty.getSizeInBits();
866 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
867 (ShiftSize == 64 && ShiftAmt <= 63))
868 return 1;
869 return 0;
870}
871
872/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
873/// instruction \p MI.
874bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
875 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
876 // Swap the operands if it would introduce a profitable folding opportunity.
877 // (e.g. a shift + extend).
878 //
879 // For example:
880 // lsl w13, w11, #1
881 // cmp w13, w12
882 // can be turned into:
883 // cmp w12, w11, lsl #1
884
885 // Don't swap if there's a constant on the RHS, because we know we can fold
886 // that.
887 Register RHS = MI.getOperand(3).getReg();
888 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
889 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
890 return false;
891
892 Register LHS = MI.getOperand(2).getReg();
893 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
894 auto GetRegForProfit = [&](Register Reg) {
896 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
897 };
898
899 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
900 // compare, would we be able to fold more instructions?
901 Register TheLHS = GetRegForProfit(LHS);
902 Register TheRHS = GetRegForProfit(RHS);
903
904 // If the LHS is more likely to give us a folding opportunity, then swap the
905 // LHS and RHS.
906 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
908}
909
910void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
911 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
912 Register LHS = MI.getOperand(2).getReg();
913 Register RHS = MI.getOperand(3).getReg();
914 Observer.changedInstr(MI);
915 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
916 MI.getOperand(2).setReg(RHS);
917 MI.getOperand(3).setReg(LHS);
918 Observer.changedInstr(MI);
919}
920
921/// \returns a function which builds a vector floating point compare instruction
922/// for a condition code \p CC.
923/// \param [in] IsZero - True if the comparison is against 0.
924/// \param [in] NoNans - True if the target has NoNansFPMath.
925std::function<Register(MachineIRBuilder &)>
926getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
927 bool NoNans, MachineRegisterInfo &MRI) {
928 LLT DstTy = MRI.getType(LHS);
929 assert(DstTy.isVector() && "Expected vector types only?");
930 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
931 switch (CC) {
932 default:
933 llvm_unreachable("Unexpected condition code!");
934 case AArch64CC::NE:
935 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
936 auto FCmp = IsZero
937 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
938 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
939 return MIB.buildNot(DstTy, FCmp).getReg(0);
940 };
941 case AArch64CC::EQ:
942 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
943 return IsZero
944 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
945 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
946 .getReg(0);
947 };
948 case AArch64CC::GE:
949 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
950 return IsZero
951 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
952 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
953 .getReg(0);
954 };
955 case AArch64CC::GT:
956 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
957 return IsZero
958 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
959 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
960 .getReg(0);
961 };
962 case AArch64CC::LS:
963 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
964 return IsZero
965 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
966 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
967 .getReg(0);
968 };
969 case AArch64CC::MI:
970 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
971 return IsZero
972 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
973 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
974 .getReg(0);
975 };
976 }
977}
978
979/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
980bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
981 MachineIRBuilder &MIB) {
982 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
983 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
984
985 Register Dst = MI.getOperand(0).getReg();
986 LLT DstTy = MRI.getType(Dst);
987 if (!DstTy.isVector() || !ST.hasNEON())
988 return false;
989 Register LHS = MI.getOperand(2).getReg();
990 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
991 if (EltSize == 16 && !ST.hasFullFP16())
992 return false;
993 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
994 return false;
995
996 return true;
997}
998
999/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1000void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1001 MachineIRBuilder &MIB) {
1002 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1003 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1004
1005 const auto &CmpMI = cast<GFCmp>(MI);
1006
1007 Register Dst = CmpMI.getReg(0);
1008 CmpInst::Predicate Pred = CmpMI.getCond();
1009 Register LHS = CmpMI.getLHSReg();
1010 Register RHS = CmpMI.getRHSReg();
1011
1012 LLT DstTy = MRI.getType(Dst);
1013
1014 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1015
1016 // Compares against 0 have special target-specific pseudos.
1017 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1018
1019 bool Invert = false;
1021 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1022 Pred == CmpInst::Predicate::FCMP_UNO) &&
1023 IsZero) {
1024 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1025 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1026 // "ord" normally would.
1027 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1028 // thus equivalent to a != a.
1029 RHS = LHS;
1030 IsZero = false;
1031 CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1032 } else
1033 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1034
1035 // Instead of having an apply function, just build here to simplify things.
1037
1038 const bool NoNans =
1039 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1040
1041 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1042 Register CmpRes;
1043 if (CC2 == AArch64CC::AL)
1044 CmpRes = Cmp(MIB);
1045 else {
1046 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1047 auto Cmp2Dst = Cmp2(MIB);
1048 auto Cmp1Dst = Cmp(MIB);
1049 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1050 }
1051 if (Invert)
1052 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1053 MRI.replaceRegWith(Dst, CmpRes);
1054 MI.eraseFromParent();
1055}
1056
1057bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1058 Register &SrcReg) {
1059 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1060 Register DstReg = MI.getOperand(0).getReg();
1061 if (MRI.getType(DstReg).isVector())
1062 return false;
1063 // Match a store of a truncate.
1064 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1065 return false;
1066 // Only form truncstores for value types of max 64b.
1067 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1068}
1069
1070void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1072 Register &SrcReg) {
1073 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1074 Observer.changingInstr(MI);
1075 MI.getOperand(0).setReg(SrcReg);
1076 Observer.changedInstr(MI);
1077}
1078
1079// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1080// form in the first place for combine opportunities, so any remaining ones
1081// at this stage need be lowered back.
1082bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1083 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1084 Register DstReg = MI.getOperand(0).getReg();
1085 LLT DstTy = MRI.getType(DstReg);
1086 return DstTy.isVector();
1087}
1088
1089void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1091 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1092 B.setInstrAndDebugLoc(MI);
1093 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1094 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1095}
1096
1097/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1098/// => unused, <N x t> = unmerge v
1099bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1100 Register &MatchInfo) {
1101 auto &Unmerge = cast<GUnmerge>(MI);
1102 if (Unmerge.getNumDefs() != 2)
1103 return false;
1104 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1105 return false;
1106
1107 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1108 if (!DstTy.isVector())
1109 return false;
1110
1111 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1112 if (!Ext)
1113 return false;
1114
1115 Register ExtSrc1 = Ext->getOperand(1).getReg();
1116 Register ExtSrc2 = Ext->getOperand(2).getReg();
1117 auto LowestVal =
1118 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1119 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1120 return false;
1121
1122 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1123 return false;
1124
1125 MatchInfo = ExtSrc1;
1126 return true;
1127}
1128
1129void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1131 GISelChangeObserver &Observer, Register &SrcReg) {
1132 Observer.changingInstr(MI);
1133 // Swap dst registers.
1134 Register Dst1 = MI.getOperand(0).getReg();
1135 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1136 MI.getOperand(1).setReg(Dst1);
1137 MI.getOperand(2).setReg(SrcReg);
1138 Observer.changedInstr(MI);
1139}
1140
1141// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1142// Match v2s64 mul instructions, which will then be scalarised later on
1143// Doing these two matches in one function to ensure that the order of matching
1144// will always be the same.
1145// Try lowering MUL to MULL before trying to scalarize if needed.
1146bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
1147 // Get the instructions that defined the source operand
1148 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1149 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1150 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1151
1152 if (DstTy.isVector()) {
1153 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1154 unsigned I1Opc = I1->getOpcode();
1155 unsigned I2Opc = I2->getOpcode();
1156 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1157 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1158 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1159 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1160 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1161 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1162 return true;
1163 }
1164 // If result type is v2s64, scalarise the instruction
1165 else if (DstTy == LLT::fixed_vector(2, 64)) {
1166 return true;
1167 }
1168 }
1169 return false;
1170}
1171
1172void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
1174 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1175 "Expected a G_MUL instruction");
1176
1177 // Get the instructions that defined the source operand
1178 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1179 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1180 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1181
1182 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1183 unsigned I1Opc = I1->getOpcode();
1184 unsigned I2Opc = I2->getOpcode();
1185 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1186 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1187 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1188 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1189 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1190 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1191
1192 B.setInstrAndDebugLoc(MI);
1193 B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
1194 : AArch64::G_SMULL,
1195 {MI.getOperand(0).getReg()},
1196 {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
1197 MI.eraseFromParent();
1198 }
1199 // If result type is v2s64, scalarise the instruction
1200 else if (DstTy == LLT::fixed_vector(2, 64)) {
1201 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1202 B.setInstrAndDebugLoc(MI);
1203 Helper.fewerElementsVector(
1204 MI, 0,
1205 DstTy.changeElementCount(
1207 }
1208}
1209
1210class AArch64PostLegalizerLoweringImpl : public Combiner {
1211protected:
1212 // TODO: Make CombinerHelper methods const.
1213 mutable CombinerHelper Helper;
1214 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1215 const AArch64Subtarget &STI;
1216
1217public:
1218 AArch64PostLegalizerLoweringImpl(
1219 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1220 GISelCSEInfo *CSEInfo,
1221 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1222 const AArch64Subtarget &STI);
1223
1224 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1225
1226 bool tryCombineAll(MachineInstr &I) const override;
1227
1228private:
1229#define GET_GICOMBINER_CLASS_MEMBERS
1230#include "AArch64GenPostLegalizeGILowering.inc"
1231#undef GET_GICOMBINER_CLASS_MEMBERS
1232};
1233
1234#define GET_GICOMBINER_IMPL
1235#include "AArch64GenPostLegalizeGILowering.inc"
1236#undef GET_GICOMBINER_IMPL
1237
1238AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1239 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1240 GISelCSEInfo *CSEInfo,
1241 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1242 const AArch64Subtarget &STI)
1243 : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
1244 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1245 STI(STI),
1247#include "AArch64GenPostLegalizeGILowering.inc"
1249{
1250}
1251
1252class AArch64PostLegalizerLowering : public MachineFunctionPass {
1253public:
1254 static char ID;
1255
1256 AArch64PostLegalizerLowering();
1257
1258 StringRef getPassName() const override {
1259 return "AArch64PostLegalizerLowering";
1260 }
1261
1262 bool runOnMachineFunction(MachineFunction &MF) override;
1263 void getAnalysisUsage(AnalysisUsage &AU) const override;
1264
1265private:
1266 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1267};
1268} // end anonymous namespace
1269
1270void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1272 AU.setPreservesCFG();
1275}
1276
1277AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1280
1281 if (!RuleConfig.parseCommandLineOption())
1282 report_fatal_error("Invalid rule identifier");
1283}
1284
1285bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1286 if (MF.getProperties().hasProperty(
1287 MachineFunctionProperties::Property::FailedISel))
1288 return false;
1290 MachineFunctionProperties::Property::Legalized) &&
1291 "Expected a legalized function?");
1292 auto *TPC = &getAnalysis<TargetPassConfig>();
1293 const Function &F = MF.getFunction();
1294
1296 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1297 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1298 F.hasOptSize(), F.hasMinSize());
1299 // Disable fixed-point iteration to reduce compile-time
1300 CInfo.MaxIterations = 1;
1301 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1302 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1303 CInfo.EnableFullDCE = false;
1304 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1305 RuleConfig, ST);
1306 return Impl.combineMachineInstrs();
1307}
1308
1309char AArch64PostLegalizerLowering::ID = 0;
1310INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1311 "Lower AArch64 MachineInstrs after legalization", false,
1312 false)
1314INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1315 "Lower AArch64 MachineInstrs after legalization", false,
1316 false)
1317
1318namespace llvm {
1320 return new AArch64PostLegalizerLowering();
1321}
1322} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isLegalArithImmed(uint64_t C)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
This file declares the targeting of the Machinelegalizer class for AArch64.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Lower AArch64 MachineInstrs after legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
This contains common code to allow clients to notify changes to machine instr.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
unsigned logBase2() const
Definition: APInt.h:1717
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
The CSE Analysis object.
Definition: CSEInfo.h:69
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target-Independent Code Generator Pass Configuration Options.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define UINT64_MAX
Definition: DataTypes.h:77
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:646
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1168
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.