LLVM 23.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64ExpandImm.h"
25#include "AArch64Subtarget.h"
45#include "llvm/IR/InstrTypes.h"
47#include <optional>
48
49#define GET_GICOMBINER_DEPS
50#include "AArch64GenPostLegalizeGILowering.inc"
51#undef GET_GICOMBINER_DEPS
52
53#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
54
55using namespace llvm;
56using namespace MIPatternMatch;
57using namespace AArch64GISelUtils;
58
59namespace {
60
61#define GET_GICOMBINER_TYPES
62#include "AArch64GenPostLegalizeGILowering.inc"
63#undef GET_GICOMBINER_TYPES
64
65/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
66///
67/// Used for matching target-supported shuffles before codegen.
68struct ShuffleVectorPseudo {
69 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
70 Register Dst; ///< Destination register.
71 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
72 ShuffleVectorPseudo(unsigned Opc, Register Dst,
73 std::initializer_list<SrcOp> SrcOps)
74 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
75 ShuffleVectorPseudo() = default;
76};
77
78/// Return true if a G_FCONSTANT instruction is known to be better-represented
79/// as a G_CONSTANT.
80bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
81 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
82 Register DstReg = MI.getOperand(0).getReg();
83 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
84 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
85 return false;
86
87 // When we're storing a value, it doesn't matter what register bank it's on.
88 // Since not all floating point constants can be materialized using a fmov,
89 // it makes more sense to just use a GPR.
90 return all_of(MRI.use_nodbg_instructions(DstReg),
91 [](const MachineInstr &Use) { return Use.mayStore(); });
92}
93
94/// Change a G_FCONSTANT into a G_CONSTANT.
95void applyFConstantToConstant(MachineInstr &MI) {
96 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
98 const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
99 MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
100 MI.eraseFromParent();
101}
102
103/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
104/// sources of the shuffle are different.
105std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
106 unsigned NumElts) {
107 // Look for the first non-undef element.
108 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
109 if (FirstRealElt == M.end())
110 return std::nullopt;
111
112 // Use APInt to handle overflow when calculating expected element.
113 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
114 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
115
116 // The following shuffle indices must be the successive elements after the
117 // first real element.
118 if (any_of(
119 make_range(std::next(FirstRealElt), M.end()),
120 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
121 return std::nullopt;
122
123 // The index of an EXT is the first element if it is not UNDEF.
124 // Watch out for the beginning UNDEFs. The EXT index should be the expected
125 // value of the first element. E.g.
126 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
127 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
128 // ExpectedElt is the last mask index plus 1.
129 uint64_t Imm = ExpectedElt.getZExtValue();
130 bool ReverseExt = false;
131
132 // There are two difference cases requiring to reverse input vectors.
133 // For example, for vector <4 x i32> we have the following cases,
134 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
135 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
136 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
137 // to reverse two input vectors.
138 if (Imm < NumElts)
139 ReverseExt = true;
140 else
141 Imm -= NumElts;
142 return std::make_pair(ReverseExt, Imm);
143}
144
145/// Helper function for matchINS.
146///
147/// \returns a value when \p M is an ins mask for \p NumInputElements.
148///
149/// First element of the returned pair is true when the produced
150/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
151///
152/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
153std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
154 int NumInputElements) {
155 if (M.size() != static_cast<size_t>(NumInputElements))
156 return std::nullopt;
157 int NumLHSMatch = 0, NumRHSMatch = 0;
158 int LastLHSMismatch = -1, LastRHSMismatch = -1;
159 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
160 if (M[Idx] == -1) {
161 ++NumLHSMatch;
162 ++NumRHSMatch;
163 continue;
164 }
165 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
166 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
167 }
168 const int NumNeededToMatch = NumInputElements - 1;
169 if (NumLHSMatch == NumNeededToMatch)
170 return std::make_pair(true, LastLHSMismatch);
171 if (NumRHSMatch == NumNeededToMatch)
172 return std::make_pair(false, LastRHSMismatch);
173 return std::nullopt;
174}
175
176/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
177/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
178bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
179 ShuffleVectorPseudo &MatchInfo) {
180 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
181 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
182 Register Dst = MI.getOperand(0).getReg();
183 Register Src = MI.getOperand(1).getReg();
184 LLT Ty = MRI.getType(Dst);
185 unsigned EltSize = Ty.getScalarSizeInBits();
186
187 // Element size for a rev cannot be 64.
188 if (EltSize == 64)
189 return false;
190
191 unsigned NumElts = Ty.getNumElements();
192
193 // Try to produce a G_REV instruction
194 for (unsigned LaneSize : {64U, 32U, 16U}) {
195 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
196 unsigned Opcode;
197 if (LaneSize == 64U)
198 Opcode = AArch64::G_REV64;
199 else if (LaneSize == 32U)
200 Opcode = AArch64::G_REV32;
201 else
202 Opcode = AArch64::G_BSWAP;
203
204 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
205 return true;
206 }
207 }
208
209 return false;
210}
211
212/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
213/// a G_TRN1 or G_TRN2 instruction.
214bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
215 ShuffleVectorPseudo &MatchInfo) {
216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217 unsigned WhichResult;
218 unsigned OperandOrder;
219 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
220 Register Dst = MI.getOperand(0).getReg();
221 unsigned NumElts = MRI.getType(Dst).getNumElements();
222 if (!isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
223 return false;
224 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
225 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
226 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
227 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
228 return true;
229}
230
231/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
232/// a G_UZP1 or G_UZP2 instruction.
233///
234/// \param [in] MI - The shuffle vector instruction.
235/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
236bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
237 ShuffleVectorPseudo &MatchInfo) {
238 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
239 unsigned WhichResult;
240 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
241 Register Dst = MI.getOperand(0).getReg();
242 unsigned NumElts = MRI.getType(Dst).getNumElements();
243 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
244 return false;
245 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
246 Register V1 = MI.getOperand(1).getReg();
247 Register V2 = MI.getOperand(2).getReg();
248 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
249 return true;
250}
251
252bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
253 ShuffleVectorPseudo &MatchInfo) {
254 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
255 unsigned WhichResult;
256 unsigned OperandOrder;
257 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
258 Register Dst = MI.getOperand(0).getReg();
259 unsigned NumElts = MRI.getType(Dst).getNumElements();
260 if (!isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
261 return false;
262 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
263 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
264 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
265 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
266 return true;
267}
268
269/// Helper function for matchDup.
270bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
272 ShuffleVectorPseudo &MatchInfo) {
273 if (Lane != 0)
274 return false;
275
276 // Try to match a vector splat operation into a dup instruction.
277 // We're looking for this pattern:
278 //
279 // %scalar:gpr(s64) = COPY $x0
280 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
281 // %cst0:gpr(s32) = G_CONSTANT i32 0
282 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
283 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
284 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
285 // %zerovec(<2 x s32>)
286 //
287 // ...into:
288 // %splat = G_DUP %scalar
289
290 // Begin matching the insert.
291 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
292 MI.getOperand(1).getReg(), MRI);
293 if (!InsMI)
294 return false;
295 // Match the undef vector operand.
296 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
297 MRI))
298 return false;
299
300 // Match the index constant 0.
301 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
302 return false;
303
304 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
305 {InsMI->getOperand(2).getReg()});
306 return true;
307}
308
309/// Helper function for matchDup.
310bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
312 ShuffleVectorPseudo &MatchInfo) {
313 assert(Lane >= 0 && "Expected positive lane?");
314 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
315 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
316 // lane's definition directly.
317 auto *BuildVecMI =
318 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
319 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
320 // If Lane >= NumElements then it is point to RHS, just check from RHS
321 if (NumElements <= Lane)
322 Lane -= NumElements;
323
324 if (!BuildVecMI)
325 return false;
326 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
327 MatchInfo =
328 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
329 return true;
330}
331
332bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
333 ShuffleVectorPseudo &MatchInfo) {
334 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
335 auto MaybeLane = getSplatIndex(MI);
336 if (!MaybeLane)
337 return false;
338 int Lane = *MaybeLane;
339 // If this is undef splat, generate it via "just" vdup, if possible.
340 if (Lane < 0)
341 Lane = 0;
342 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
343 return true;
344 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
345 return true;
346 return false;
347}
348
349// Check if an EXT instruction can handle the shuffle mask when the vector
350// sources of the shuffle are the same.
351bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
352 unsigned NumElts = Ty.getNumElements();
353
354 // Assume that the first shuffle index is not UNDEF. Fail if it is.
355 if (M[0] < 0)
356 return false;
357
358 // If this is a VEXT shuffle, the immediate value is the index of the first
359 // element. The other shuffle indices must be the successive elements after
360 // the first one.
361 unsigned ExpectedElt = M[0];
362 for (unsigned I = 1; I < NumElts; ++I) {
363 // Increment the expected index. If it wraps around, just follow it
364 // back to index zero and keep going.
365 ++ExpectedElt;
366 if (ExpectedElt == NumElts)
367 ExpectedElt = 0;
368
369 if (M[I] < 0)
370 continue; // Ignore UNDEF indices.
371 if (ExpectedElt != static_cast<unsigned>(M[I]))
372 return false;
373 }
374
375 return true;
376}
377
378bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
379 ShuffleVectorPseudo &MatchInfo) {
380 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
381 Register Dst = MI.getOperand(0).getReg();
382 LLT DstTy = MRI.getType(Dst);
383 Register V1 = MI.getOperand(1).getReg();
384 Register V2 = MI.getOperand(2).getReg();
385 auto Mask = MI.getOperand(3).getShuffleMask();
387 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
388 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
389
390 if (!ExtInfo) {
391 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
392 !isSingletonExtMask(Mask, DstTy))
393 return false;
394
395 Imm = Mask[0] * ExtFactor;
396 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
397 return true;
398 }
399 bool ReverseExt;
400 std::tie(ReverseExt, Imm) = *ExtInfo;
401 if (ReverseExt)
402 std::swap(V1, V2);
403 Imm *= ExtFactor;
404 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
405 return true;
406}
407
408/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
409/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
410void applyShuffleVectorPseudo(MachineInstr &MI, MachineRegisterInfo &MRI,
411 ShuffleVectorPseudo &MatchInfo) {
412 MachineIRBuilder MIRBuilder(MI);
413 if (MatchInfo.Opc == TargetOpcode::G_BSWAP) {
414 assert(MatchInfo.SrcOps.size() == 1);
415 LLT DstTy = MRI.getType(MatchInfo.Dst);
416 assert(DstTy == LLT::fixed_vector(8, 8) ||
417 DstTy == LLT::fixed_vector(16, 8));
418 LLT BSTy = DstTy == LLT::fixed_vector(8, 8) ? LLT::fixed_vector(4, 16)
419 : LLT::fixed_vector(8, 16);
420 // FIXME: NVCAST
421 auto BS1 = MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {BSTy},
422 MatchInfo.SrcOps[0]);
423 auto BS2 = MIRBuilder.buildInstr(MatchInfo.Opc, {BSTy}, {BS1});
424 MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {MatchInfo.Dst}, {BS2});
425 } else
426 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
427 MI.eraseFromParent();
428}
429
430/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
431/// Special-cased because the constant operand must be emitted as a G_CONSTANT
432/// for the imported tablegen patterns to work.
433void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
434 MachineIRBuilder MIRBuilder(MI);
435 if (MatchInfo.SrcOps[2].getImm() == 0)
436 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
437 else {
438 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
439 auto Cst =
440 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
441 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
442 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
443 }
444 MI.eraseFromParent();
445}
446
447void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
448 Register Dst = MI.getOperand(0).getReg();
449 Register Src = MI.getOperand(1).getReg();
450 LLT DstTy = MRI.getType(Dst);
451 assert(DstTy.getSizeInBits() == 128 &&
452 "Expected 128bit vector in applyFullRev");
453 MachineIRBuilder MIRBuilder(MI);
454 auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
455 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
456 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
457 MI.eraseFromParent();
458}
459
460bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
461 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
462
463 auto ValAndVReg =
464 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
465 return !ValAndVReg;
466}
467
468void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
469 MachineIRBuilder &Builder) {
471 Builder.setInstrAndDebugLoc(Insert);
472
473 Register Offset = Insert.getIndexReg();
474 LLT VecTy = MRI.getType(Insert.getReg(0));
475 LLT EltTy = MRI.getType(Insert.getElementReg());
476 LLT IdxTy = MRI.getType(Insert.getIndexReg());
477
478 if (VecTy.isScalableVector())
479 return;
480
481 // Create a stack slot and store the vector into it
482 MachineFunction &MF = Builder.getMF();
483 Align Alignment(
484 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
485 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
486 Alignment, false);
487 LLT FramePtrTy = LLT::pointer(0, 64);
489 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
490
491 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
492
493 // Get the pointer to the element, and be sure not to hit undefined behavior
494 // if the index is out of bounds.
496 "Expected a power-2 vector size");
497 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
498 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
499 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
500 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
501 Register EltPtr =
502 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
503 .getReg(0);
504
505 // Write the inserted element
506 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
507 // Reload the whole vector.
508 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
509 Insert.eraseFromParent();
510}
511
512/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
513/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
514///
515/// e.g.
516/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
517///
518/// Can be represented as
519///
520/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
521/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
522///
523bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
524 std::tuple<Register, int, Register, int> &MatchInfo) {
525 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
526 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
527 Register Dst = MI.getOperand(0).getReg();
528 int NumElts = MRI.getType(Dst).getNumElements();
529 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
530 if (!DstIsLeftAndDstLane)
531 return false;
532 bool DstIsLeft;
533 int DstLane;
534 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
535 Register Left = MI.getOperand(1).getReg();
536 Register Right = MI.getOperand(2).getReg();
537 Register DstVec = DstIsLeft ? Left : Right;
538 Register SrcVec = Left;
539
540 int SrcLane = ShuffleMask[DstLane];
541 if (SrcLane >= NumElts) {
542 SrcVec = Right;
543 SrcLane -= NumElts;
544 }
545
546 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
547 return true;
548}
549
550void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
551 MachineIRBuilder &Builder,
552 std::tuple<Register, int, Register, int> &MatchInfo) {
553 Builder.setInstrAndDebugLoc(MI);
554 Register Dst = MI.getOperand(0).getReg();
555 auto ScalarTy = MRI.getType(Dst).getElementType();
556 Register DstVec, SrcVec;
557 int DstLane, SrcLane;
558 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
559 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
560 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
561 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
562 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
563 MI.eraseFromParent();
564}
565
566/// isVShiftRImm - Check if this is a valid vector for the immediate
567/// operand of a vector shift right operation. The value must be in the range:
568/// 1 <= Value <= ElementBits for a right shift.
570 int64_t &Cnt) {
571 assert(Ty.isVector() && "vector shift count is not a vector type");
573 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
574 if (!Cst)
575 return false;
576 Cnt = *Cst;
577 int64_t ElementBits = Ty.getScalarSizeInBits();
578 return Cnt >= 1 && Cnt <= ElementBits;
579}
580
581/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
582bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
583 int64_t &Imm) {
584 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
585 MI.getOpcode() == TargetOpcode::G_LSHR);
586 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
587 if (!Ty.isVector())
588 return false;
589 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
590}
591
592void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
593 int64_t &Imm) {
594 unsigned Opc = MI.getOpcode();
595 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
596 unsigned NewOpc =
597 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
598 MachineIRBuilder MIB(MI);
599 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
600 MI.eraseFromParent();
601}
602
603/// Determine if it is possible to modify the \p RHS and predicate \p P of a
604/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
605///
606/// \returns A pair containing the updated immediate and predicate which may
607/// be used to optimize the instruction.
608///
609/// \note This assumes that the comparison has been legalized.
610std::optional<std::pair<uint64_t, CmpInst::Predicate>>
611tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
612 const MachineRegisterInfo &MRI) {
613 const auto &Ty = MRI.getType(RHS);
614 if (Ty.isVector())
615 return std::nullopt;
616 unsigned Size = Ty.getSizeInBits();
617 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
618
619 // If the RHS is not a constant, or the RHS is already a valid arithmetic
620 // immediate, then there is nothing to change.
621 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
622 if (!ValAndVReg)
623 return std::nullopt;
624 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
625 uint64_t C = OriginalC;
626 if (isLegalArithImmed(C))
627 return std::nullopt;
628
629 // We have a non-arithmetic immediate. Check if adjusting the immediate and
630 // adjusting the predicate will result in a legal arithmetic immediate.
631 switch (P) {
632 default:
633 return std::nullopt;
636 // Check for
637 //
638 // x slt c => x sle c - 1
639 // x sge c => x sgt c - 1
640 //
641 // When c is not the smallest possible negative number.
642 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
643 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
644 return std::nullopt;
645 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
646 C -= 1;
647 break;
650 // Check for
651 //
652 // x ult c => x ule c - 1
653 // x uge c => x ugt c - 1
654 //
655 // When c is not zero.
656 assert(C != 0 && "C should not be zero here!");
657 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
658 C -= 1;
659 break;
662 // Check for
663 //
664 // x sle c => x slt c + 1
665 // x sgt c => s sge c + 1
666 //
667 // When c is not the largest possible signed integer.
668 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
669 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
670 return std::nullopt;
671 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
672 C += 1;
673 break;
676 // Check for
677 //
678 // x ule c => x ult c + 1
679 // x ugt c => s uge c + 1
680 //
681 // When c is not the largest possible unsigned integer.
682 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
683 (Size == 64 && C == UINT64_MAX))
684 return std::nullopt;
685 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
686 C += 1;
687 break;
688 }
689
690 // Check if the new constant is valid, and return the updated constant and
691 // predicate if it is.
692 if (Size == 32)
693 C = static_cast<uint32_t>(C);
694 if (isLegalArithImmed(C))
695 return {{C, P}};
696
697 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
699 AArch64_IMM::expandMOVImm(Imm, 32, Insn);
700 return Insn.size();
701 };
702
703 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))
704 return {{C, P}};
705
706 return std::nullopt;
707}
708
709/// Determine whether or not it is possible to update the RHS and predicate of
710/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
711/// immediate.
712///
713/// \p MI - The G_ICMP instruction
714/// \p MatchInfo - The new RHS immediate and predicate on success
715///
716/// See tryAdjustICmpImmAndPred for valid transformations.
717bool matchAdjustICmpImmAndPred(
719 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
720 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
721 Register RHS = MI.getOperand(3).getReg();
722 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
723 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
724 MatchInfo = *MaybeNewImmAndPred;
725 return true;
726 }
727 return false;
728}
729
730void applyAdjustICmpImmAndPred(
731 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
732 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
734 MachineOperand &RHS = MI.getOperand(3);
735 MachineRegisterInfo &MRI = *MIB.getMRI();
736 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
737 MatchInfo.first);
738 Observer.changingInstr(MI);
739 RHS.setReg(Cst->getOperand(0).getReg());
740 MI.getOperand(1).setPredicate(MatchInfo.second);
741 Observer.changedInstr(MI);
742}
743
744bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
745 std::pair<unsigned, int> &MatchInfo) {
746 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
747 Register Src1Reg = MI.getOperand(1).getReg();
748 const LLT SrcTy = MRI.getType(Src1Reg);
749 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
750
751 auto LaneIdx = getSplatIndex(MI);
752 if (!LaneIdx)
753 return false;
754
755 // The lane idx should be within the first source vector.
756 if (*LaneIdx >= SrcTy.getNumElements())
757 return false;
758
759 if (DstTy != SrcTy)
760 return false;
761
762 LLT ScalarTy = SrcTy.getElementType();
763 unsigned ScalarSize = ScalarTy.getSizeInBits();
764
765 unsigned Opc = 0;
766 switch (SrcTy.getNumElements()) {
767 case 2:
768 if (ScalarSize == 64)
769 Opc = AArch64::G_DUPLANE64;
770 else if (ScalarSize == 32)
771 Opc = AArch64::G_DUPLANE32;
772 break;
773 case 4:
774 if (ScalarSize == 32)
775 Opc = AArch64::G_DUPLANE32;
776 else if (ScalarSize == 16)
777 Opc = AArch64::G_DUPLANE16;
778 break;
779 case 8:
780 if (ScalarSize == 8)
781 Opc = AArch64::G_DUPLANE8;
782 else if (ScalarSize == 16)
783 Opc = AArch64::G_DUPLANE16;
784 break;
785 case 16:
786 if (ScalarSize == 8)
787 Opc = AArch64::G_DUPLANE8;
788 break;
789 default:
790 break;
791 }
792 if (!Opc)
793 return false;
794
795 MatchInfo.first = Opc;
796 MatchInfo.second = *LaneIdx;
797 return true;
798}
799
800void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
801 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
802 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
803 Register Src1Reg = MI.getOperand(1).getReg();
804 const LLT SrcTy = MRI.getType(Src1Reg);
805
806 B.setInstrAndDebugLoc(MI);
807 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
808
809 Register DupSrc = MI.getOperand(1).getReg();
810 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
811 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
812 if (SrcTy.getSizeInBits() == 64) {
813 auto Undef = B.buildUndef(SrcTy);
814 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
815 {Src1Reg, Undef.getReg(0)})
816 .getReg(0);
817 }
818 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
819 MI.eraseFromParent();
820}
821
822bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
823 auto &Unmerge = cast<GUnmerge>(MI);
824 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
825 const LLT SrcTy = MRI.getType(Src1Reg);
826 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
827 return false;
828 return SrcTy.isVector() && !SrcTy.isScalable() &&
829 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
830}
831
832void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
834 auto &Unmerge = cast<GUnmerge>(MI);
835 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
836 const LLT SrcTy = MRI.getType(Src1Reg);
837 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
838 "Expected a fixed length vector");
839
840 for (int I = 0; I < SrcTy.getNumElements(); ++I)
841 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
842 MI.eraseFromParent();
843}
844
845bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
846 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
847
848 // Later, during selection, we'll try to match imported patterns using
849 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
850 // G_BUILD_VECTORs which could match those patterns.
852 return false;
853
854 return getAArch64VectorSplat(MI, MRI).has_value();
855}
856
857void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
859 B.setInstrAndDebugLoc(MI);
860 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
861 {MI.getOperand(1).getReg()});
862 MI.eraseFromParent();
863}
864
865/// \returns how many instructions would be saved by folding a G_ICMP's shift
866/// and/or extension operations.
868 // No instructions to save if there's more than one use or no uses.
869 if (!MRI.hasOneNonDBGUse(CmpOp))
870 return 0;
871
872 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
873 auto IsSupportedExtend = [&](const MachineInstr &MI) {
874 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
875 return true;
876 if (MI.getOpcode() != TargetOpcode::G_AND)
877 return false;
878 auto ValAndVReg =
879 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
880 if (!ValAndVReg)
881 return false;
882 uint64_t Mask = ValAndVReg->Value.getZExtValue();
883 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
884 };
885
887 if (IsSupportedExtend(*Def))
888 return 1;
889
890 unsigned Opc = Def->getOpcode();
891 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
892 Opc != TargetOpcode::G_LSHR)
893 return 0;
894
895 auto MaybeShiftAmt =
896 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
897 if (!MaybeShiftAmt)
898 return 0;
899 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
900 MachineInstr *ShiftLHS =
901 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
902
903 // Check if we can fold an extend and a shift.
904 // FIXME: This is duplicated with the selector. (See:
905 // selectArithExtendedRegister)
906 if (IsSupportedExtend(*ShiftLHS))
907 return (ShiftAmt <= 4) ? 2 : 1;
908
909 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
910 if (Ty.isVector())
911 return 0;
912 unsigned ShiftSize = Ty.getSizeInBits();
913 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
914 (ShiftSize == 64 && ShiftAmt <= 63))
915 return 1;
916 return 0;
917}
918
919/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
920/// instruction \p MI.
921bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
922 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
923 // Swap the operands if it would introduce a profitable folding opportunity.
924 // (e.g. a shift + extend).
925 //
926 // For example:
927 // lsl w13, w11, #1
928 // cmp w13, w12
929 // can be turned into:
930 // cmp w12, w11, lsl #1
931
932 // Don't swap if there's a constant on the RHS, because we know we can fold
933 // that.
934 Register RHS = MI.getOperand(3).getReg();
935 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
936 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
937 return false;
938
939 Register LHS = MI.getOperand(2).getReg();
940 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
941 auto GetRegForProfit = [&](Register Reg) {
943 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
944 };
945
946 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
947 // compare, would we be able to fold more instructions?
948 Register TheLHS = GetRegForProfit(LHS);
949 Register TheRHS = GetRegForProfit(RHS);
950
951 // If the LHS is more likely to give us a folding opportunity, then swap the
952 // LHS and RHS.
953 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
954 getCmpOperandFoldingProfit(TheRHS, MRI));
955}
956
957void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
958 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
959 Register LHS = MI.getOperand(2).getReg();
960 Register RHS = MI.getOperand(3).getReg();
961 Observer.changedInstr(MI);
962 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
963 MI.getOperand(2).setReg(RHS);
964 MI.getOperand(3).setReg(LHS);
965 Observer.changedInstr(MI);
966}
967
968/// \returns a function which builds a vector floating point compare instruction
969/// for a condition code \p CC.
970/// \param [in] NoNans - True if the instruction has nnan flag.
971std::function<Register(MachineIRBuilder &)>
972getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
973 MachineRegisterInfo &MRI) {
974 LLT DstTy = MRI.getType(LHS);
975 assert(DstTy.isVector() && "Expected vector types only?");
976 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
977 switch (CC) {
978 default:
979 llvm_unreachable("Unexpected condition code!");
980 case AArch64CC::NE:
981 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
982 auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
983 return MIB.buildNot(DstTy, FCmp).getReg(0);
984 };
985 case AArch64CC::EQ:
986 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
987 return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
988 };
989 case AArch64CC::GE:
990 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
991 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
992 };
993 case AArch64CC::GT:
994 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
995 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
996 };
997 case AArch64CC::LS:
998 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
999 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
1000 };
1001 case AArch64CC::MI:
1002 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1003 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
1004 };
1005 }
1006}
1007
1008/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1009bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1010 MachineIRBuilder &MIB) {
1011 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1012 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1013
1014 Register Dst = MI.getOperand(0).getReg();
1015 LLT DstTy = MRI.getType(Dst);
1016 if (!DstTy.isVector() || !ST.hasNEON())
1017 return false;
1018 Register LHS = MI.getOperand(2).getReg();
1019 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
1020 if (EltSize == 16 && !ST.hasFullFP16())
1021 return false;
1022 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1023 return false;
1024
1025 return true;
1026}
1027
1028/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1029void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1030 MachineIRBuilder &MIB) {
1031 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1032
1033 const auto &CmpMI = cast<GFCmp>(MI);
1034
1035 Register Dst = CmpMI.getReg(0);
1036 CmpInst::Predicate Pred = CmpMI.getCond();
1037 Register LHS = CmpMI.getLHSReg();
1038 Register RHS = CmpMI.getRHSReg();
1039
1040 LLT DstTy = MRI.getType(Dst);
1041
1042 bool Invert = false;
1044 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1046 isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
1047 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1048 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1049 // "ord" normally would.
1050 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1051 // thus equivalent to a != a.
1052 RHS = LHS;
1054 } else
1055 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1056
1057 // Instead of having an apply function, just build here to simplify things.
1059
1060 // TODO: Also consider GISelValueTracking result if eligible.
1061 const bool NoNans = MI.getFlag(MachineInstr::FmNoNans);
1062
1063 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1064 Register CmpRes;
1065 if (CC2 == AArch64CC::AL)
1066 CmpRes = Cmp(MIB);
1067 else {
1068 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
1069 auto Cmp2Dst = Cmp2(MIB);
1070 auto Cmp1Dst = Cmp(MIB);
1071 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1072 }
1073 if (Invert)
1074 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1075 MRI.replaceRegWith(Dst, CmpRes);
1076 MI.eraseFromParent();
1077}
1078
1079// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1080bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1081 auto *GBuildVec = cast<GBuildVector>(&MI);
1082
1083 // Check if the values are all constants
1084 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1085 auto ConstVal =
1086 getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);
1087
1088 if (!ConstVal.has_value())
1089 return true;
1090 }
1091
1092 return false;
1093}
1094
1095void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1097 auto *GBuildVec = cast<GBuildVector>(&MI);
1098 LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1099 Register DstReg = B.buildUndef(DstTy).getReg(0);
1100
1101 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1102 Register SrcReg = GBuildVec->getSourceReg(I);
1103 if (mi_match(SrcReg, MRI, m_GImplicitDef()))
1104 continue;
1105 auto IdxReg = B.buildConstant(LLT::scalar(64), I);
1106 DstReg =
1107 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1108 }
1109 B.buildCopy(GBuildVec->getReg(0), DstReg);
1110 GBuildVec->eraseFromParent();
1111}
1112
1113bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1114 Register &SrcReg) {
1115 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1116 Register DstReg = MI.getOperand(0).getReg();
1117 if (MRI.getType(DstReg).isVector())
1118 return false;
1119 // Match a store of a truncate.
1120 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1121 return false;
1122 // Only form truncstores for value types of max 64b.
1123 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1124}
1125
1126void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1128 Register &SrcReg) {
1129 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1130 Observer.changingInstr(MI);
1131 MI.getOperand(0).setReg(SrcReg);
1132 Observer.changedInstr(MI);
1133}
1134
1135// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1136// form in the first place for combine opportunities, so any remaining ones
1137// at this stage need be lowered back.
1138bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1139 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1140 Register DstReg = MI.getOperand(0).getReg();
1141 LLT DstTy = MRI.getType(DstReg);
1142 return DstTy.isVector();
1143}
1144
1145void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1147 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1148 B.setInstrAndDebugLoc(MI);
1149 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1150 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1151}
1152
1153/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1154/// => unused, <N x t> = unmerge v
1155bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1156 Register &MatchInfo) {
1157 auto &Unmerge = cast<GUnmerge>(MI);
1158 if (Unmerge.getNumDefs() != 2)
1159 return false;
1160 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1161 return false;
1162
1163 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1164 if (!DstTy.isVector())
1165 return false;
1166
1167 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1168 if (!Ext)
1169 return false;
1170
1171 Register ExtSrc1 = Ext->getOperand(1).getReg();
1172 Register ExtSrc2 = Ext->getOperand(2).getReg();
1173 auto LowestVal =
1175 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1176 return false;
1177
1178 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1179 return false;
1180
1181 MatchInfo = ExtSrc1;
1182 return true;
1183}
1184
1185void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1187 GISelChangeObserver &Observer, Register &SrcReg) {
1188 Observer.changingInstr(MI);
1189 // Swap dst registers.
1190 Register Dst1 = MI.getOperand(0).getReg();
1191 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1192 MI.getOperand(1).setReg(Dst1);
1193 MI.getOperand(2).setReg(SrcReg);
1194 Observer.changedInstr(MI);
1195}
1196
1197// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1198// Match v2s64 mul instructions, which will then be scalarised later on
1199// Doing these two matches in one function to ensure that the order of matching
1200// will always be the same.
1201// Try lowering MUL to MULL before trying to scalarize if needed.
1202bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {
1203 // Get the instructions that defined the source operand
1204 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1205 return DstTy == LLT::fixed_vector(2, 64);
1206}
1207
1208void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,
1210 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1211 "Expected a G_MUL instruction");
1212
1213 // Get the instructions that defined the source operand
1214 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1215 assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");
1216 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1217 Helper.fewerElementsVector(
1218 MI, 0,
1220}
1221
1222class AArch64PostLegalizerLoweringImpl : public Combiner {
1223protected:
1224 const CombinerHelper Helper;
1225 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1226 const AArch64Subtarget &STI;
1227
1228public:
1229 AArch64PostLegalizerLoweringImpl(
1230 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1231 GISelCSEInfo *CSEInfo,
1232 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1233 const AArch64Subtarget &STI);
1234
1235 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1236
1237 bool tryCombineAll(MachineInstr &I) const override;
1238
1239private:
1240#define GET_GICOMBINER_CLASS_MEMBERS
1241#include "AArch64GenPostLegalizeGILowering.inc"
1242#undef GET_GICOMBINER_CLASS_MEMBERS
1243};
1244
1245#define GET_GICOMBINER_IMPL
1246#include "AArch64GenPostLegalizeGILowering.inc"
1247#undef GET_GICOMBINER_IMPL
1248
1249AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1250 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1251 GISelCSEInfo *CSEInfo,
1252 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1253 const AArch64Subtarget &STI)
1254 : Combiner(MF, CInfo, TPC, /*VT*/ nullptr, CSEInfo),
1255 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1256 STI(STI),
1258#include "AArch64GenPostLegalizeGILowering.inc"
1260{
1261}
1262
1263class AArch64PostLegalizerLowering : public MachineFunctionPass {
1264public:
1265 static char ID;
1266
1267 AArch64PostLegalizerLowering();
1268
1269 StringRef getPassName() const override {
1270 return "AArch64PostLegalizerLowering";
1271 }
1272
1273 bool runOnMachineFunction(MachineFunction &MF) override;
1274 void getAnalysisUsage(AnalysisUsage &AU) const override;
1275
1276private:
1277 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1278};
1279} // end anonymous namespace
1280
1281void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1283 AU.setPreservesCFG();
1286}
1287
1288AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1290 if (!RuleConfig.parseCommandLineOption())
1291 report_fatal_error("Invalid rule identifier");
1292}
1293
1294bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1295 if (MF.getProperties().hasFailedISel())
1296 return false;
1297 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1298 auto *TPC = &getAnalysis<TargetPassConfig>();
1299 const Function &F = MF.getFunction();
1300
1302 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1303 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1304 F.hasOptSize(), F.hasMinSize());
1305 // Disable fixed-point iteration to reduce compile-time
1306 CInfo.MaxIterations = 1;
1307 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1308 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1309 CInfo.EnableFullDCE = false;
1310 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1311 RuleConfig, ST);
1312 return Impl.combineMachineInstrs();
1313}
1314
1315char AArch64PostLegalizerLowering::ID = 0;
1316INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1317 "Lower AArch64 MachineInstrs after legalization", false,
1318 false)
1320INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1321 "Lower AArch64 MachineInstrs after legalization", false,
1322 false)
1323
1324namespace llvm {
1326 return new AArch64PostLegalizerLowering();
1327}
1328} // end namespace llvm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
BinaryOperator * Mul
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned logBase2() const
Definition APInt.h:1776
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Combiner implementation.
Definition Combiner.h:34
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Target-Independent Code Generator Pass Configuration Options.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define UINT64_MAX
Definition DataTypes.h:77
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
constexpr bool isLegalArithImmed(const uint64_t C)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ImplicitDefMatch m_GImplicitDef()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1502
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
@ Undef
Value of the register doesn't matter.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:493
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:438
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1508
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1205
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.