LLVM 19.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
228 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
229 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
231
232 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
233 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
234 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
235
236 unsigned emitConstantPoolEntry(const Constant *CPVal,
237 MachineFunction &MF) const;
239 MachineIRBuilder &MIRBuilder) const;
240
241 // Emit a vector concat operation.
242 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
243 Register Op2,
244 MachineIRBuilder &MIRBuilder) const;
245
246 // Emit an integer compare between LHS and RHS, which checks for Predicate.
247 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
248 MachineOperand &Predicate,
249 MachineIRBuilder &MIRBuilder) const;
250
251 /// Emit a floating point comparison between \p LHS and \p RHS.
252 /// \p Pred if given is the intended predicate to use.
254 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
255 std::optional<CmpInst::Predicate> = std::nullopt) const;
256
258 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
259 std::initializer_list<llvm::SrcOp> SrcOps,
260 MachineIRBuilder &MIRBuilder,
261 const ComplexRendererFns &RenderFns = std::nullopt) const;
262 /// Helper function to emit an add or sub instruction.
263 ///
264 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
265 /// in a specific order.
266 ///
267 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
268 ///
269 /// \code
270 /// const std::array<std::array<unsigned, 2>, 4> Table {
271 /// {{AArch64::ADDXri, AArch64::ADDWri},
272 /// {AArch64::ADDXrs, AArch64::ADDWrs},
273 /// {AArch64::ADDXrr, AArch64::ADDWrr},
274 /// {AArch64::SUBXri, AArch64::SUBWri},
275 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
276 /// \endcode
277 ///
278 /// Each row in the table corresponds to a different addressing mode. Each
279 /// column corresponds to a different register size.
280 ///
281 /// \attention Rows must be structured as follows:
282 /// - Row 0: The ri opcode variants
283 /// - Row 1: The rs opcode variants
284 /// - Row 2: The rr opcode variants
285 /// - Row 3: The ri opcode variants for negative immediates
286 /// - Row 4: The rx opcode variants
287 ///
288 /// \attention Columns must be structured as follows:
289 /// - Column 0: The 64-bit opcode variants
290 /// - Column 1: The 32-bit opcode variants
291 ///
292 /// \p Dst is the destination register of the binop to emit.
293 /// \p LHS is the left-hand operand of the binop to emit.
294 /// \p RHS is the right-hand operand of the binop to emit.
295 MachineInstr *emitAddSub(
296 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
298 MachineIRBuilder &MIRBuilder) const;
299 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
301 MachineIRBuilder &MIRBuilder) const;
303 MachineIRBuilder &MIRBuilder) const;
305 MachineIRBuilder &MIRBuilder) const;
307 MachineIRBuilder &MIRBuilder) const;
309 MachineIRBuilder &MIRBuilder) const;
311 MachineIRBuilder &MIRBuilder) const;
313 MachineIRBuilder &MIRBuilder) const;
314 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
318 const RegisterBank &DstRB, LLT ScalarTy,
319 Register VecReg, unsigned LaneIdx,
320 MachineIRBuilder &MIRBuilder) const;
321 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
323 MachineIRBuilder &MIRBuilder) const;
324 /// Emit a CSet for a FP compare.
325 ///
326 /// \p Dst is expected to be a 32-bit scalar register.
327 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
328 MachineIRBuilder &MIRBuilder) const;
329
330 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
331 /// Might elide the instruction if the previous instruction already sets NZCV
332 /// correctly.
333 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
334
335 /// Emit the overflow op for \p Opcode.
336 ///
337 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
338 /// G_USUBO, etc.
339 std::pair<MachineInstr *, AArch64CC::CondCode>
340 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
341 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
342
343 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
344
345 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
346 /// In some cases this is even possible with OR operations in the expression.
348 MachineIRBuilder &MIB) const;
351 AArch64CC::CondCode Predicate,
353 MachineIRBuilder &MIB) const;
355 bool Negate, Register CCOp,
356 AArch64CC::CondCode Predicate,
357 MachineIRBuilder &MIB) const;
358
359 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
360 /// \p IsNegative is true if the test should be "not zero".
361 /// This will also optimize the test bit instruction when possible.
362 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
363 MachineBasicBlock *DstMBB,
364 MachineIRBuilder &MIB) const;
365
366 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
367 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
368 MachineBasicBlock *DestMBB,
369 MachineIRBuilder &MIB) const;
370
371 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
372 // We use these manually instead of using the importer since it doesn't
373 // support SDNodeXForm.
374 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
375 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
376 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
377 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
378
379 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
380 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
381 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
382
383 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
384 unsigned Size) const;
385
386 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
387 return selectAddrModeUnscaled(Root, 1);
388 }
389 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 2);
391 }
392 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 4);
394 }
395 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 8);
397 }
398 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 16);
400 }
401
402 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
403 /// from complex pattern matchers like selectAddrModeIndexed().
404 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
405 MachineRegisterInfo &MRI) const;
406
407 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
408 unsigned Size) const;
409 template <int Width>
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
411 return selectAddrModeIndexed(Root, Width / 8);
412 }
413
414 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
415 const MachineRegisterInfo &MRI) const;
416 ComplexRendererFns
417 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
418 unsigned SizeInBytes) const;
419
420 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
421 /// or not a shift + extend should be folded into an addressing mode. Returns
422 /// None when this is not profitable or possible.
423 ComplexRendererFns
424 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
425 MachineOperand &Offset, unsigned SizeInBytes,
426 bool WantsExt) const;
427 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
428 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
429 unsigned SizeInBytes) const;
430 template <int Width>
431 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
432 return selectAddrModeXRO(Root, Width / 8);
433 }
434
435 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
436 unsigned SizeInBytes) const;
437 template <int Width>
438 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
439 return selectAddrModeWRO(Root, Width / 8);
440 }
441
442 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
443 bool AllowROR = false) const;
444
445 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
446 return selectShiftedRegister(Root);
447 }
448
449 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
450 return selectShiftedRegister(Root, true);
451 }
452
453 /// Given an extend instruction, determine the correct shift-extend type for
454 /// that instruction.
455 ///
456 /// If the instruction is going to be used in a load or store, pass
457 /// \p IsLoadStore = true.
459 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
460 bool IsLoadStore = false) const;
461
462 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
463 ///
464 /// \returns Either \p Reg if no change was necessary, or the new register
465 /// created by moving \p Reg.
466 ///
467 /// Note: This uses emitCopy right now.
468 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
469 MachineIRBuilder &MIB) const;
470
471 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
472
473 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
474
475 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
476 int OpIdx = -1) const;
477 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
478 int OpIdx = -1) const;
479 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
480 int OpIdx = -1) const;
481 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
482 int OpIdx) const;
483 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
486 int OpIdx = -1) const;
487 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
488 int OpIdx = -1) const;
489 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
490 const MachineInstr &MI,
491 int OpIdx = -1) const;
492
493 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
494 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
495
496 // Optimization methods.
497 bool tryOptSelect(GSelect &Sel);
498 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
499 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
500 MachineOperand &Predicate,
501 MachineIRBuilder &MIRBuilder) const;
502
503 /// Return true if \p MI is a load or store of \p NumBytes bytes.
504 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
505
506 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
507 /// register zeroed out. In other words, the result of MI has been explicitly
508 /// zero extended.
509 bool isDef32(const MachineInstr &MI) const;
510
512 const AArch64Subtarget &STI;
513 const AArch64InstrInfo &TII;
515 const AArch64RegisterBankInfo &RBI;
516
517 bool ProduceNonFlagSettingCondBr = false;
518
519 // Some cached values used during selection.
520 // We use LR as a live-in register, and we keep track of it here as it can be
521 // clobbered by calls.
522 Register MFReturnAddr;
523
525
526#define GET_GLOBALISEL_PREDICATES_DECL
527#include "AArch64GenGlobalISel.inc"
528#undef GET_GLOBALISEL_PREDICATES_DECL
529
530// We declare the temporaries used by selectImpl() in the class to minimize the
531// cost of constructing placeholder values.
532#define GET_GLOBALISEL_TEMPORARIES_DECL
533#include "AArch64GenGlobalISel.inc"
534#undef GET_GLOBALISEL_TEMPORARIES_DECL
535};
536
537} // end anonymous namespace
538
539#define GET_GLOBALISEL_IMPL
540#include "AArch64GenGlobalISel.inc"
541#undef GET_GLOBALISEL_IMPL
542
543AArch64InstructionSelector::AArch64InstructionSelector(
544 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
545 const AArch64RegisterBankInfo &RBI)
546 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
547 RBI(RBI),
549#include "AArch64GenGlobalISel.inc"
552#include "AArch64GenGlobalISel.inc"
554{
555}
556
557// FIXME: This should be target-independent, inferred from the types declared
558// for each class in the bank.
559//
560/// Given a register bank, and a type, return the smallest register class that
561/// can represent that combination.
562static const TargetRegisterClass *
563getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
564 bool GetAllRegSet = false) {
565 if (RB.getID() == AArch64::GPRRegBankID) {
566 if (Ty.getSizeInBits() <= 32)
567 return GetAllRegSet ? &AArch64::GPR32allRegClass
568 : &AArch64::GPR32RegClass;
569 if (Ty.getSizeInBits() == 64)
570 return GetAllRegSet ? &AArch64::GPR64allRegClass
571 : &AArch64::GPR64RegClass;
572 if (Ty.getSizeInBits() == 128)
573 return &AArch64::XSeqPairsClassRegClass;
574 return nullptr;
575 }
576
577 if (RB.getID() == AArch64::FPRRegBankID) {
578 switch (Ty.getSizeInBits()) {
579 case 8:
580 return &AArch64::FPR8RegClass;
581 case 16:
582 return &AArch64::FPR16RegClass;
583 case 32:
584 return &AArch64::FPR32RegClass;
585 case 64:
586 return &AArch64::FPR64RegClass;
587 case 128:
588 return &AArch64::FPR128RegClass;
589 }
590 return nullptr;
591 }
592
593 return nullptr;
594}
595
596/// Given a register bank, and size in bits, return the smallest register class
597/// that can represent that combination.
598static const TargetRegisterClass *
599getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
600 bool GetAllRegSet = false) {
601 unsigned RegBankID = RB.getID();
602
603 if (RegBankID == AArch64::GPRRegBankID) {
604 if (SizeInBits <= 32)
605 return GetAllRegSet ? &AArch64::GPR32allRegClass
606 : &AArch64::GPR32RegClass;
607 if (SizeInBits == 64)
608 return GetAllRegSet ? &AArch64::GPR64allRegClass
609 : &AArch64::GPR64RegClass;
610 if (SizeInBits == 128)
611 return &AArch64::XSeqPairsClassRegClass;
612 }
613
614 if (RegBankID == AArch64::FPRRegBankID) {
615 switch (SizeInBits) {
616 default:
617 return nullptr;
618 case 8:
619 return &AArch64::FPR8RegClass;
620 case 16:
621 return &AArch64::FPR16RegClass;
622 case 32:
623 return &AArch64::FPR32RegClass;
624 case 64:
625 return &AArch64::FPR64RegClass;
626 case 128:
627 return &AArch64::FPR128RegClass;
628 }
629 }
630
631 return nullptr;
632}
633
634/// Returns the correct subregister to use for a given register class.
636 const TargetRegisterInfo &TRI, unsigned &SubReg) {
637 switch (TRI.getRegSizeInBits(*RC)) {
638 case 8:
639 SubReg = AArch64::bsub;
640 break;
641 case 16:
642 SubReg = AArch64::hsub;
643 break;
644 case 32:
645 if (RC != &AArch64::FPR32RegClass)
646 SubReg = AArch64::sub_32;
647 else
648 SubReg = AArch64::ssub;
649 break;
650 case 64:
651 SubReg = AArch64::dsub;
652 break;
653 default:
655 dbgs() << "Couldn't find appropriate subregister for register class.");
656 return false;
657 }
658
659 return true;
660}
661
662/// Returns the minimum size the given register bank can hold.
663static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
664 switch (RB.getID()) {
665 case AArch64::GPRRegBankID:
666 return 32;
667 case AArch64::FPRRegBankID:
668 return 8;
669 default:
670 llvm_unreachable("Tried to get minimum size for unknown register bank.");
671 }
672}
673
674/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
675/// Helper function for functions like createDTuple and createQTuple.
676///
677/// \p RegClassIDs - The list of register class IDs available for some tuple of
678/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
679/// expected to contain between 2 and 4 tuple classes.
680///
681/// \p SubRegs - The list of subregister classes associated with each register
682/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
683/// subregister class. The index of each subregister class is expected to
684/// correspond with the index of each register class.
685///
686/// \returns Either the destination register of REG_SEQUENCE instruction that
687/// was created, or the 0th element of \p Regs if \p Regs contains a single
688/// element.
690 const unsigned RegClassIDs[],
691 const unsigned SubRegs[], MachineIRBuilder &MIB) {
692 unsigned NumRegs = Regs.size();
693 if (NumRegs == 1)
694 return Regs[0];
695 assert(NumRegs >= 2 && NumRegs <= 4 &&
696 "Only support between two and 4 registers in a tuple!");
698 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
699 auto RegSequence =
700 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
701 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
702 RegSequence.addUse(Regs[I]);
703 RegSequence.addImm(SubRegs[I]);
704 }
705 return RegSequence.getReg(0);
706}
707
708/// Create a tuple of D-registers using the registers in \p Regs.
710 static const unsigned RegClassIDs[] = {
711 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
712 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
713 AArch64::dsub2, AArch64::dsub3};
714 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
715}
716
717/// Create a tuple of Q-registers using the registers in \p Regs.
719 static const unsigned RegClassIDs[] = {
720 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
721 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
722 AArch64::qsub2, AArch64::qsub3};
723 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
724}
725
726static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
727 auto &MI = *Root.getParent();
728 auto &MBB = *MI.getParent();
729 auto &MF = *MBB.getParent();
730 auto &MRI = MF.getRegInfo();
731 uint64_t Immed;
732 if (Root.isImm())
733 Immed = Root.getImm();
734 else if (Root.isCImm())
735 Immed = Root.getCImm()->getZExtValue();
736 else if (Root.isReg()) {
737 auto ValAndVReg =
739 if (!ValAndVReg)
740 return std::nullopt;
741 Immed = ValAndVReg->Value.getSExtValue();
742 } else
743 return std::nullopt;
744 return Immed;
745}
746
747/// Check whether \p I is a currently unsupported binary operation:
748/// - it has an unsized type
749/// - an operand is not a vreg
750/// - all operands are not in the same bank
751/// These are checks that should someday live in the verifier, but right now,
752/// these are mostly limitations of the aarch64 selector.
753static bool unsupportedBinOp(const MachineInstr &I,
754 const AArch64RegisterBankInfo &RBI,
756 const AArch64RegisterInfo &TRI) {
757 LLT Ty = MRI.getType(I.getOperand(0).getReg());
758 if (!Ty.isValid()) {
759 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
760 return true;
761 }
762
763 const RegisterBank *PrevOpBank = nullptr;
764 for (auto &MO : I.operands()) {
765 // FIXME: Support non-register operands.
766 if (!MO.isReg()) {
767 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
768 return true;
769 }
770
771 // FIXME: Can generic operations have physical registers operands? If
772 // so, this will need to be taught about that, and we'll need to get the
773 // bank out of the minimal class for the register.
774 // Either way, this needs to be documented (and possibly verified).
775 if (!MO.getReg().isVirtual()) {
776 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
777 return true;
778 }
779
780 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
781 if (!OpBank) {
782 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
783 return true;
784 }
785
786 if (PrevOpBank && OpBank != PrevOpBank) {
787 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
788 return true;
789 }
790 PrevOpBank = OpBank;
791 }
792 return false;
793}
794
795/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
796/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
797/// and of size \p OpSize.
798/// \returns \p GenericOpc if the combination is unsupported.
799static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
800 unsigned OpSize) {
801 switch (RegBankID) {
802 case AArch64::GPRRegBankID:
803 if (OpSize == 32) {
804 switch (GenericOpc) {
805 case TargetOpcode::G_SHL:
806 return AArch64::LSLVWr;
807 case TargetOpcode::G_LSHR:
808 return AArch64::LSRVWr;
809 case TargetOpcode::G_ASHR:
810 return AArch64::ASRVWr;
811 default:
812 return GenericOpc;
813 }
814 } else if (OpSize == 64) {
815 switch (GenericOpc) {
816 case TargetOpcode::G_PTR_ADD:
817 return AArch64::ADDXrr;
818 case TargetOpcode::G_SHL:
819 return AArch64::LSLVXr;
820 case TargetOpcode::G_LSHR:
821 return AArch64::LSRVXr;
822 case TargetOpcode::G_ASHR:
823 return AArch64::ASRVXr;
824 default:
825 return GenericOpc;
826 }
827 }
828 break;
829 case AArch64::FPRRegBankID:
830 switch (OpSize) {
831 case 32:
832 switch (GenericOpc) {
833 case TargetOpcode::G_FADD:
834 return AArch64::FADDSrr;
835 case TargetOpcode::G_FSUB:
836 return AArch64::FSUBSrr;
837 case TargetOpcode::G_FMUL:
838 return AArch64::FMULSrr;
839 case TargetOpcode::G_FDIV:
840 return AArch64::FDIVSrr;
841 default:
842 return GenericOpc;
843 }
844 case 64:
845 switch (GenericOpc) {
846 case TargetOpcode::G_FADD:
847 return AArch64::FADDDrr;
848 case TargetOpcode::G_FSUB:
849 return AArch64::FSUBDrr;
850 case TargetOpcode::G_FMUL:
851 return AArch64::FMULDrr;
852 case TargetOpcode::G_FDIV:
853 return AArch64::FDIVDrr;
854 case TargetOpcode::G_OR:
855 return AArch64::ORRv8i8;
856 default:
857 return GenericOpc;
858 }
859 }
860 break;
861 }
862 return GenericOpc;
863}
864
865/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
866/// appropriate for the (value) register bank \p RegBankID and of memory access
867/// size \p OpSize. This returns the variant with the base+unsigned-immediate
868/// addressing mode (e.g., LDRXui).
869/// \returns \p GenericOpc if the combination is unsupported.
870static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
871 unsigned OpSize) {
872 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
873 switch (RegBankID) {
874 case AArch64::GPRRegBankID:
875 switch (OpSize) {
876 case 8:
877 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
878 case 16:
879 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
880 case 32:
881 return isStore ? AArch64::STRWui : AArch64::LDRWui;
882 case 64:
883 return isStore ? AArch64::STRXui : AArch64::LDRXui;
884 }
885 break;
886 case AArch64::FPRRegBankID:
887 switch (OpSize) {
888 case 8:
889 return isStore ? AArch64::STRBui : AArch64::LDRBui;
890 case 16:
891 return isStore ? AArch64::STRHui : AArch64::LDRHui;
892 case 32:
893 return isStore ? AArch64::STRSui : AArch64::LDRSui;
894 case 64:
895 return isStore ? AArch64::STRDui : AArch64::LDRDui;
896 case 128:
897 return isStore ? AArch64::STRQui : AArch64::LDRQui;
898 }
899 break;
900 }
901 return GenericOpc;
902}
903
904/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
905/// to \p *To.
906///
907/// E.g "To = COPY SrcReg:SubReg"
909 const RegisterBankInfo &RBI, Register SrcReg,
910 const TargetRegisterClass *To, unsigned SubReg) {
911 assert(SrcReg.isValid() && "Expected a valid source register?");
912 assert(To && "Destination register class cannot be null");
913 assert(SubReg && "Expected a valid subregister");
914
915 MachineIRBuilder MIB(I);
916 auto SubRegCopy =
917 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
918 MachineOperand &RegOp = I.getOperand(1);
919 RegOp.setReg(SubRegCopy.getReg(0));
920
921 // It's possible that the destination register won't be constrained. Make
922 // sure that happens.
923 if (!I.getOperand(0).getReg().isPhysical())
924 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
925
926 return true;
927}
928
929/// Helper function to get the source and destination register classes for a
930/// copy. Returns a std::pair containing the source register class for the
931/// copy, and the destination register class for the copy. If a register class
932/// cannot be determined, then it will be nullptr.
933static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
936 const RegisterBankInfo &RBI) {
937 Register DstReg = I.getOperand(0).getReg();
938 Register SrcReg = I.getOperand(1).getReg();
939 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
940 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
941 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
942 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
943
944 // Special casing for cross-bank copies of s1s. We can technically represent
945 // a 1-bit value with any size of register. The minimum size for a GPR is 32
946 // bits. So, we need to put the FPR on 32 bits as well.
947 //
948 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
949 // then we can pull it into the helpers that get the appropriate class for a
950 // register bank. Or make a new helper that carries along some constraint
951 // information.
952 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
953 SrcSize = DstSize = 32;
954
955 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
956 getMinClassForRegBank(DstRegBank, DstSize, true)};
957}
958
959// FIXME: We need some sort of API in RBI/TRI to allow generic code to
960// constrain operands of simple instructions given a TargetRegisterClass
961// and LLT
963 const RegisterBankInfo &RBI) {
964 for (MachineOperand &MO : I.operands()) {
965 if (!MO.isReg())
966 continue;
967 Register Reg = MO.getReg();
968 if (!Reg)
969 continue;
970 if (Reg.isPhysical())
971 continue;
972 LLT Ty = MRI.getType(Reg);
973 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
974 const TargetRegisterClass *RC =
975 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
976 if (!RC) {
977 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
978 RC = getRegClassForTypeOnBank(Ty, RB);
979 if (!RC) {
981 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
982 break;
983 }
984 }
985 RBI.constrainGenericRegister(Reg, *RC, MRI);
986 }
987
988 return true;
989}
990
993 const RegisterBankInfo &RBI) {
994 Register DstReg = I.getOperand(0).getReg();
995 Register SrcReg = I.getOperand(1).getReg();
996 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
997 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
998
999 // Find the correct register classes for the source and destination registers.
1000 const TargetRegisterClass *SrcRC;
1001 const TargetRegisterClass *DstRC;
1002 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1003
1004 if (!DstRC) {
1005 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1006 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1007 return false;
1008 }
1009
1010 // Is this a copy? If so, then we may need to insert a subregister copy.
1011 if (I.isCopy()) {
1012 // Yes. Check if there's anything to fix up.
1013 if (!SrcRC) {
1014 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1015 return false;
1016 }
1017
1018 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
1019 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
1020 unsigned SubReg;
1021
1022 // If the source bank doesn't support a subregister copy small enough,
1023 // then we first need to copy to the destination bank.
1024 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1025 const TargetRegisterClass *DstTempRC =
1026 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1027 getSubRegForClass(DstRC, TRI, SubReg);
1028
1029 MachineIRBuilder MIB(I);
1030 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1031 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1032 } else if (SrcSize > DstSize) {
1033 // If the source register is bigger than the destination we need to
1034 // perform a subregister copy.
1035 const TargetRegisterClass *SubRegRC =
1036 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1037 getSubRegForClass(SubRegRC, TRI, SubReg);
1038 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1039 } else if (DstSize > SrcSize) {
1040 // If the destination register is bigger than the source we need to do
1041 // a promotion using SUBREG_TO_REG.
1042 const TargetRegisterClass *PromotionRC =
1043 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1044 getSubRegForClass(SrcRC, TRI, SubReg);
1045
1046 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1047 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1048 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1049 .addImm(0)
1050 .addUse(SrcReg)
1051 .addImm(SubReg);
1052 MachineOperand &RegOp = I.getOperand(1);
1053 RegOp.setReg(PromoteReg);
1054 }
1055
1056 // If the destination is a physical register, then there's nothing to
1057 // change, so we're done.
1058 if (DstReg.isPhysical())
1059 return true;
1060 }
1061
1062 // No need to constrain SrcReg. It will get constrained when we hit another
1063 // of its use or its defs. Copies do not have constraints.
1064 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1065 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1066 << " operand\n");
1067 return false;
1068 }
1069
1070 // If this a GPR ZEXT that we want to just reduce down into a copy.
1071 // The sizes will be mismatched with the source < 32b but that's ok.
1072 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1073 I.setDesc(TII.get(AArch64::COPY));
1074 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1075 return selectCopy(I, TII, MRI, TRI, RBI);
1076 }
1077
1078 I.setDesc(TII.get(AArch64::COPY));
1079 return true;
1080}
1081
1082static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1083 if (!DstTy.isScalar() || !SrcTy.isScalar())
1084 return GenericOpc;
1085
1086 const unsigned DstSize = DstTy.getSizeInBits();
1087 const unsigned SrcSize = SrcTy.getSizeInBits();
1088
1089 switch (DstSize) {
1090 case 32:
1091 switch (SrcSize) {
1092 case 32:
1093 switch (GenericOpc) {
1094 case TargetOpcode::G_SITOFP:
1095 return AArch64::SCVTFUWSri;
1096 case TargetOpcode::G_UITOFP:
1097 return AArch64::UCVTFUWSri;
1098 case TargetOpcode::G_FPTOSI:
1099 return AArch64::FCVTZSUWSr;
1100 case TargetOpcode::G_FPTOUI:
1101 return AArch64::FCVTZUUWSr;
1102 default:
1103 return GenericOpc;
1104 }
1105 case 64:
1106 switch (GenericOpc) {
1107 case TargetOpcode::G_SITOFP:
1108 return AArch64::SCVTFUXSri;
1109 case TargetOpcode::G_UITOFP:
1110 return AArch64::UCVTFUXSri;
1111 case TargetOpcode::G_FPTOSI:
1112 return AArch64::FCVTZSUWDr;
1113 case TargetOpcode::G_FPTOUI:
1114 return AArch64::FCVTZUUWDr;
1115 default:
1116 return GenericOpc;
1117 }
1118 default:
1119 return GenericOpc;
1120 }
1121 case 64:
1122 switch (SrcSize) {
1123 case 32:
1124 switch (GenericOpc) {
1125 case TargetOpcode::G_SITOFP:
1126 return AArch64::SCVTFUWDri;
1127 case TargetOpcode::G_UITOFP:
1128 return AArch64::UCVTFUWDri;
1129 case TargetOpcode::G_FPTOSI:
1130 return AArch64::FCVTZSUXSr;
1131 case TargetOpcode::G_FPTOUI:
1132 return AArch64::FCVTZUUXSr;
1133 default:
1134 return GenericOpc;
1135 }
1136 case 64:
1137 switch (GenericOpc) {
1138 case TargetOpcode::G_SITOFP:
1139 return AArch64::SCVTFUXDri;
1140 case TargetOpcode::G_UITOFP:
1141 return AArch64::UCVTFUXDri;
1142 case TargetOpcode::G_FPTOSI:
1143 return AArch64::FCVTZSUXDr;
1144 case TargetOpcode::G_FPTOUI:
1145 return AArch64::FCVTZUUXDr;
1146 default:
1147 return GenericOpc;
1148 }
1149 default:
1150 return GenericOpc;
1151 }
1152 default:
1153 return GenericOpc;
1154 };
1155 return GenericOpc;
1156}
1157
1159AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1161 MachineIRBuilder &MIB) const {
1162 MachineRegisterInfo &MRI = *MIB.getMRI();
1163 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1164 RBI.getRegBank(True, MRI, TRI)->getID() &&
1165 "Expected both select operands to have the same regbank?");
1166 LLT Ty = MRI.getType(True);
1167 if (Ty.isVector())
1168 return nullptr;
1169 const unsigned Size = Ty.getSizeInBits();
1170 assert((Size == 32 || Size == 64) &&
1171 "Expected 32 bit or 64 bit select only?");
1172 const bool Is32Bit = Size == 32;
1173 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1174 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1175 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1177 return &*FCSel;
1178 }
1179
1180 // By default, we'll try and emit a CSEL.
1181 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1182 bool Optimized = false;
1183 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1184 &Optimized](Register &Reg, Register &OtherReg,
1185 bool Invert) {
1186 if (Optimized)
1187 return false;
1188
1189 // Attempt to fold:
1190 //
1191 // %sub = G_SUB 0, %x
1192 // %select = G_SELECT cc, %reg, %sub
1193 //
1194 // Into:
1195 // %select = CSNEG %reg, %x, cc
1196 Register MatchReg;
1197 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1198 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1199 Reg = MatchReg;
1200 if (Invert) {
1202 std::swap(Reg, OtherReg);
1203 }
1204 return true;
1205 }
1206
1207 // Attempt to fold:
1208 //
1209 // %xor = G_XOR %x, -1
1210 // %select = G_SELECT cc, %reg, %xor
1211 //
1212 // Into:
1213 // %select = CSINV %reg, %x, cc
1214 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1215 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1216 Reg = MatchReg;
1217 if (Invert) {
1219 std::swap(Reg, OtherReg);
1220 }
1221 return true;
1222 }
1223
1224 // Attempt to fold:
1225 //
1226 // %add = G_ADD %x, 1
1227 // %select = G_SELECT cc, %reg, %add
1228 //
1229 // Into:
1230 // %select = CSINC %reg, %x, cc
1231 if (mi_match(Reg, MRI,
1232 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1233 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1234 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1235 Reg = MatchReg;
1236 if (Invert) {
1238 std::swap(Reg, OtherReg);
1239 }
1240 return true;
1241 }
1242
1243 return false;
1244 };
1245
1246 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1247 // true/false values are constants.
1248 // FIXME: All of these patterns already exist in tablegen. We should be
1249 // able to import these.
1250 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1251 &Optimized]() {
1252 if (Optimized)
1253 return false;
1254 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1255 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1256 if (!TrueCst && !FalseCst)
1257 return false;
1258
1259 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1260 if (TrueCst && FalseCst) {
1261 int64_t T = TrueCst->Value.getSExtValue();
1262 int64_t F = FalseCst->Value.getSExtValue();
1263
1264 if (T == 0 && F == 1) {
1265 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1266 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1267 True = ZReg;
1268 False = ZReg;
1269 return true;
1270 }
1271
1272 if (T == 0 && F == -1) {
1273 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1274 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1275 True = ZReg;
1276 False = ZReg;
1277 return true;
1278 }
1279 }
1280
1281 if (TrueCst) {
1282 int64_t T = TrueCst->Value.getSExtValue();
1283 if (T == 1) {
1284 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1285 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1286 True = False;
1287 False = ZReg;
1289 return true;
1290 }
1291
1292 if (T == -1) {
1293 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1294 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1295 True = False;
1296 False = ZReg;
1298 return true;
1299 }
1300 }
1301
1302 if (FalseCst) {
1303 int64_t F = FalseCst->Value.getSExtValue();
1304 if (F == 1) {
1305 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1306 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1307 False = ZReg;
1308 return true;
1309 }
1310
1311 if (F == -1) {
1312 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1313 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1314 False = ZReg;
1315 return true;
1316 }
1317 }
1318 return false;
1319 };
1320
1321 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1322 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1323 Optimized |= TryOptSelectCst();
1324 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1326 return &*SelectInst;
1327}
1328
1330 switch (P) {
1331 default:
1332 llvm_unreachable("Unknown condition code!");
1333 case CmpInst::ICMP_NE:
1334 return AArch64CC::NE;
1335 case CmpInst::ICMP_EQ:
1336 return AArch64CC::EQ;
1337 case CmpInst::ICMP_SGT:
1338 return AArch64CC::GT;
1339 case CmpInst::ICMP_SGE:
1340 return AArch64CC::GE;
1341 case CmpInst::ICMP_SLT:
1342 return AArch64CC::LT;
1343 case CmpInst::ICMP_SLE:
1344 return AArch64CC::LE;
1345 case CmpInst::ICMP_UGT:
1346 return AArch64CC::HI;
1347 case CmpInst::ICMP_UGE:
1348 return AArch64CC::HS;
1349 case CmpInst::ICMP_ULT:
1350 return AArch64CC::LO;
1351 case CmpInst::ICMP_ULE:
1352 return AArch64CC::LS;
1353 }
1354}
1355
1356/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1358 AArch64CC::CondCode &CondCode,
1359 AArch64CC::CondCode &CondCode2) {
1360 CondCode2 = AArch64CC::AL;
1361 switch (CC) {
1362 default:
1363 llvm_unreachable("Unknown FP condition!");
1364 case CmpInst::FCMP_OEQ:
1365 CondCode = AArch64CC::EQ;
1366 break;
1367 case CmpInst::FCMP_OGT:
1368 CondCode = AArch64CC::GT;
1369 break;
1370 case CmpInst::FCMP_OGE:
1371 CondCode = AArch64CC::GE;
1372 break;
1373 case CmpInst::FCMP_OLT:
1374 CondCode = AArch64CC::MI;
1375 break;
1376 case CmpInst::FCMP_OLE:
1377 CondCode = AArch64CC::LS;
1378 break;
1379 case CmpInst::FCMP_ONE:
1380 CondCode = AArch64CC::MI;
1381 CondCode2 = AArch64CC::GT;
1382 break;
1383 case CmpInst::FCMP_ORD:
1384 CondCode = AArch64CC::VC;
1385 break;
1386 case CmpInst::FCMP_UNO:
1387 CondCode = AArch64CC::VS;
1388 break;
1389 case CmpInst::FCMP_UEQ:
1390 CondCode = AArch64CC::EQ;
1391 CondCode2 = AArch64CC::VS;
1392 break;
1393 case CmpInst::FCMP_UGT:
1394 CondCode = AArch64CC::HI;
1395 break;
1396 case CmpInst::FCMP_UGE:
1397 CondCode = AArch64CC::PL;
1398 break;
1399 case CmpInst::FCMP_ULT:
1400 CondCode = AArch64CC::LT;
1401 break;
1402 case CmpInst::FCMP_ULE:
1403 CondCode = AArch64CC::LE;
1404 break;
1405 case CmpInst::FCMP_UNE:
1406 CondCode = AArch64CC::NE;
1407 break;
1408 }
1409}
1410
1411/// Convert an IR fp condition code to an AArch64 CC.
1412/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1413/// should be AND'ed instead of OR'ed.
1415 AArch64CC::CondCode &CondCode,
1416 AArch64CC::CondCode &CondCode2) {
1417 CondCode2 = AArch64CC::AL;
1418 switch (CC) {
1419 default:
1420 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1421 assert(CondCode2 == AArch64CC::AL);
1422 break;
1423 case CmpInst::FCMP_ONE:
1424 // (a one b)
1425 // == ((a olt b) || (a ogt b))
1426 // == ((a ord b) && (a une b))
1427 CondCode = AArch64CC::VC;
1428 CondCode2 = AArch64CC::NE;
1429 break;
1430 case CmpInst::FCMP_UEQ:
1431 // (a ueq b)
1432 // == ((a uno b) || (a oeq b))
1433 // == ((a ule b) && (a uge b))
1434 CondCode = AArch64CC::PL;
1435 CondCode2 = AArch64CC::LE;
1436 break;
1437 }
1438}
1439
1440/// Return a register which can be used as a bit to test in a TB(N)Z.
1441static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1443 assert(Reg.isValid() && "Expected valid register!");
1444 bool HasZext = false;
1445 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1446 unsigned Opc = MI->getOpcode();
1447
1448 if (!MI->getOperand(0).isReg() ||
1449 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1450 break;
1451
1452 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1453 //
1454 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1455 // on the truncated x is the same as the bit number on x.
1456 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1457 Opc == TargetOpcode::G_TRUNC) {
1458 if (Opc == TargetOpcode::G_ZEXT)
1459 HasZext = true;
1460
1461 Register NextReg = MI->getOperand(1).getReg();
1462 // Did we find something worth folding?
1463 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1464 break;
1465
1466 // NextReg is worth folding. Keep looking.
1467 Reg = NextReg;
1468 continue;
1469 }
1470
1471 // Attempt to find a suitable operation with a constant on one side.
1472 std::optional<uint64_t> C;
1473 Register TestReg;
1474 switch (Opc) {
1475 default:
1476 break;
1477 case TargetOpcode::G_AND:
1478 case TargetOpcode::G_XOR: {
1479 TestReg = MI->getOperand(1).getReg();
1480 Register ConstantReg = MI->getOperand(2).getReg();
1481 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1482 if (!VRegAndVal) {
1483 // AND commutes, check the other side for a constant.
1484 // FIXME: Can we canonicalize the constant so that it's always on the
1485 // same side at some point earlier?
1486 std::swap(ConstantReg, TestReg);
1487 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1488 }
1489 if (VRegAndVal) {
1490 if (HasZext)
1491 C = VRegAndVal->Value.getZExtValue();
1492 else
1493 C = VRegAndVal->Value.getSExtValue();
1494 }
1495 break;
1496 }
1497 case TargetOpcode::G_ASHR:
1498 case TargetOpcode::G_LSHR:
1499 case TargetOpcode::G_SHL: {
1500 TestReg = MI->getOperand(1).getReg();
1501 auto VRegAndVal =
1502 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1503 if (VRegAndVal)
1504 C = VRegAndVal->Value.getSExtValue();
1505 break;
1506 }
1507 }
1508
1509 // Didn't find a constant or viable register. Bail out of the loop.
1510 if (!C || !TestReg.isValid())
1511 break;
1512
1513 // We found a suitable instruction with a constant. Check to see if we can
1514 // walk through the instruction.
1515 Register NextReg;
1516 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1517 switch (Opc) {
1518 default:
1519 break;
1520 case TargetOpcode::G_AND:
1521 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1522 if ((*C >> Bit) & 1)
1523 NextReg = TestReg;
1524 break;
1525 case TargetOpcode::G_SHL:
1526 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1527 // the type of the register.
1528 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1529 NextReg = TestReg;
1530 Bit = Bit - *C;
1531 }
1532 break;
1533 case TargetOpcode::G_ASHR:
1534 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1535 // in x
1536 NextReg = TestReg;
1537 Bit = Bit + *C;
1538 if (Bit >= TestRegSize)
1539 Bit = TestRegSize - 1;
1540 break;
1541 case TargetOpcode::G_LSHR:
1542 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1543 if ((Bit + *C) < TestRegSize) {
1544 NextReg = TestReg;
1545 Bit = Bit + *C;
1546 }
1547 break;
1548 case TargetOpcode::G_XOR:
1549 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1550 // appropriate.
1551 //
1552 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1553 //
1554 // tbz x', b -> tbnz x, b
1555 //
1556 // Because x' only has the b-th bit set if x does not.
1557 if ((*C >> Bit) & 1)
1558 Invert = !Invert;
1559 NextReg = TestReg;
1560 break;
1561 }
1562
1563 // Check if we found anything worth folding.
1564 if (!NextReg.isValid())
1565 return Reg;
1566 Reg = NextReg;
1567 }
1568
1569 return Reg;
1570}
1571
1572MachineInstr *AArch64InstructionSelector::emitTestBit(
1573 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1574 MachineIRBuilder &MIB) const {
1575 assert(TestReg.isValid());
1576 assert(ProduceNonFlagSettingCondBr &&
1577 "Cannot emit TB(N)Z with speculation tracking!");
1578 MachineRegisterInfo &MRI = *MIB.getMRI();
1579
1580 // Attempt to optimize the test bit by walking over instructions.
1581 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1582 LLT Ty = MRI.getType(TestReg);
1583 unsigned Size = Ty.getSizeInBits();
1584 assert(!Ty.isVector() && "Expected a scalar!");
1585 assert(Bit < 64 && "Bit is too large!");
1586
1587 // When the test register is a 64-bit register, we have to narrow to make
1588 // TBNZW work.
1589 bool UseWReg = Bit < 32;
1590 unsigned NecessarySize = UseWReg ? 32 : 64;
1591 if (Size != NecessarySize)
1592 TestReg = moveScalarRegClass(
1593 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1594 MIB);
1595
1596 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1597 {AArch64::TBZW, AArch64::TBNZW}};
1598 unsigned Opc = OpcTable[UseWReg][IsNegative];
1599 auto TestBitMI =
1600 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1601 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1602 return &*TestBitMI;
1603}
1604
1605bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1606 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1607 MachineIRBuilder &MIB) const {
1608 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1609 // Given something like this:
1610 //
1611 // %x = ...Something...
1612 // %one = G_CONSTANT i64 1
1613 // %zero = G_CONSTANT i64 0
1614 // %and = G_AND %x, %one
1615 // %cmp = G_ICMP intpred(ne), %and, %zero
1616 // %cmp_trunc = G_TRUNC %cmp
1617 // G_BRCOND %cmp_trunc, %bb.3
1618 //
1619 // We want to try and fold the AND into the G_BRCOND and produce either a
1620 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1621 //
1622 // In this case, we'd get
1623 //
1624 // TBNZ %x %bb.3
1625 //
1626
1627 // Check if the AND has a constant on its RHS which we can use as a mask.
1628 // If it's a power of 2, then it's the same as checking a specific bit.
1629 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1630 auto MaybeBit = getIConstantVRegValWithLookThrough(
1631 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1632 if (!MaybeBit)
1633 return false;
1634
1635 int32_t Bit = MaybeBit->Value.exactLogBase2();
1636 if (Bit < 0)
1637 return false;
1638
1639 Register TestReg = AndInst.getOperand(1).getReg();
1640
1641 // Emit a TB(N)Z.
1642 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1643 return true;
1644}
1645
1646MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1647 bool IsNegative,
1648 MachineBasicBlock *DestMBB,
1649 MachineIRBuilder &MIB) const {
1650 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1651 MachineRegisterInfo &MRI = *MIB.getMRI();
1652 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1653 AArch64::GPRRegBankID &&
1654 "Expected GPRs only?");
1655 auto Ty = MRI.getType(CompareReg);
1656 unsigned Width = Ty.getSizeInBits();
1657 assert(!Ty.isVector() && "Expected scalar only?");
1658 assert(Width <= 64 && "Expected width to be at most 64?");
1659 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1660 {AArch64::CBNZW, AArch64::CBNZX}};
1661 unsigned Opc = OpcTable[IsNegative][Width == 64];
1662 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1663 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1664 return &*BranchMI;
1665}
1666
1667bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1668 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1669 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1670 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1671 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1672 // totally clean. Some of them require two branches to implement.
1673 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1674 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1675 Pred);
1676 AArch64CC::CondCode CC1, CC2;
1677 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1678 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1679 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1680 if (CC2 != AArch64CC::AL)
1681 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1682 I.eraseFromParent();
1683 return true;
1684}
1685
1686bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1687 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1688 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1689 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1690 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1691 //
1692 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1693 // instructions will not be produced, as they are conditional branch
1694 // instructions that do not set flags.
1695 if (!ProduceNonFlagSettingCondBr)
1696 return false;
1697
1698 MachineRegisterInfo &MRI = *MIB.getMRI();
1699 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1700 auto Pred =
1701 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1702 Register LHS = ICmp.getOperand(2).getReg();
1703 Register RHS = ICmp.getOperand(3).getReg();
1704
1705 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1706 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1707 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1708
1709 // When we can emit a TB(N)Z, prefer that.
1710 //
1711 // Handle non-commutative condition codes first.
1712 // Note that we don't want to do this when we have a G_AND because it can
1713 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1714 if (VRegAndVal && !AndInst) {
1715 int64_t C = VRegAndVal->Value.getSExtValue();
1716
1717 // When we have a greater-than comparison, we can just test if the msb is
1718 // zero.
1719 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1720 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1721 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1722 I.eraseFromParent();
1723 return true;
1724 }
1725
1726 // When we have a less than comparison, we can just test if the msb is not
1727 // zero.
1728 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1729 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1730 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1731 I.eraseFromParent();
1732 return true;
1733 }
1734
1735 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1736 // we can test if the msb is zero.
1737 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1738 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1739 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1740 I.eraseFromParent();
1741 return true;
1742 }
1743 }
1744
1745 // Attempt to handle commutative condition codes. Right now, that's only
1746 // eq/ne.
1747 if (ICmpInst::isEquality(Pred)) {
1748 if (!VRegAndVal) {
1749 std::swap(RHS, LHS);
1750 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1751 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1752 }
1753
1754 if (VRegAndVal && VRegAndVal->Value == 0) {
1755 // If there's a G_AND feeding into this branch, try to fold it away by
1756 // emitting a TB(N)Z instead.
1757 //
1758 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1759 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1760 // would be redundant.
1761 if (AndInst &&
1762 tryOptAndIntoCompareBranch(
1763 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1764 I.eraseFromParent();
1765 return true;
1766 }
1767
1768 // Otherwise, try to emit a CB(N)Z instead.
1769 auto LHSTy = MRI.getType(LHS);
1770 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1771 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1772 I.eraseFromParent();
1773 return true;
1774 }
1775 }
1776 }
1777
1778 return false;
1779}
1780
1781bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1782 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1783 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1784 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1785 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1786 return true;
1787
1788 // Couldn't optimize. Emit a compare + a Bcc.
1789 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1790 auto PredOp = ICmp.getOperand(1);
1791 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1793 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1794 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1795 I.eraseFromParent();
1796 return true;
1797}
1798
1799bool AArch64InstructionSelector::selectCompareBranch(
1801 Register CondReg = I.getOperand(0).getReg();
1802 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1803 // Try to select the G_BRCOND using whatever is feeding the condition if
1804 // possible.
1805 unsigned CCMIOpc = CCMI->getOpcode();
1806 if (CCMIOpc == TargetOpcode::G_FCMP)
1807 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1808 if (CCMIOpc == TargetOpcode::G_ICMP)
1809 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1810
1811 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1812 // instructions will not be produced, as they are conditional branch
1813 // instructions that do not set flags.
1814 if (ProduceNonFlagSettingCondBr) {
1815 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1816 I.getOperand(1).getMBB(), MIB);
1817 I.eraseFromParent();
1818 return true;
1819 }
1820
1821 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1822 auto TstMI =
1823 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1825 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1827 .addMBB(I.getOperand(1).getMBB());
1828 I.eraseFromParent();
1829 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1830}
1831
1832/// Returns the element immediate value of a vector shift operand if found.
1833/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1834static std::optional<int64_t> getVectorShiftImm(Register Reg,
1836 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1837 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1838 return getAArch64VectorSplatScalar(*OpMI, MRI);
1839}
1840
1841/// Matches and returns the shift immediate value for a SHL instruction given
1842/// a shift operand.
1843static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1845 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1846 if (!ShiftImm)
1847 return std::nullopt;
1848 // Check the immediate is in range for a SHL.
1849 int64_t Imm = *ShiftImm;
1850 if (Imm < 0)
1851 return std::nullopt;
1852 switch (SrcTy.getElementType().getSizeInBits()) {
1853 default:
1854 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1855 return std::nullopt;
1856 case 8:
1857 if (Imm > 7)
1858 return std::nullopt;
1859 break;
1860 case 16:
1861 if (Imm > 15)
1862 return std::nullopt;
1863 break;
1864 case 32:
1865 if (Imm > 31)
1866 return std::nullopt;
1867 break;
1868 case 64:
1869 if (Imm > 63)
1870 return std::nullopt;
1871 break;
1872 }
1873 return Imm;
1874}
1875
1876bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1878 assert(I.getOpcode() == TargetOpcode::G_SHL);
1879 Register DstReg = I.getOperand(0).getReg();
1880 const LLT Ty = MRI.getType(DstReg);
1881 Register Src1Reg = I.getOperand(1).getReg();
1882 Register Src2Reg = I.getOperand(2).getReg();
1883
1884 if (!Ty.isVector())
1885 return false;
1886
1887 // Check if we have a vector of constants on RHS that we can select as the
1888 // immediate form.
1889 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1890
1891 unsigned Opc = 0;
1892 if (Ty == LLT::fixed_vector(2, 64)) {
1893 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1894 } else if (Ty == LLT::fixed_vector(4, 32)) {
1895 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1896 } else if (Ty == LLT::fixed_vector(2, 32)) {
1897 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1898 } else if (Ty == LLT::fixed_vector(4, 16)) {
1899 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1900 } else if (Ty == LLT::fixed_vector(8, 16)) {
1901 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1902 } else if (Ty == LLT::fixed_vector(16, 8)) {
1903 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1904 } else if (Ty == LLT::fixed_vector(8, 8)) {
1905 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1906 } else {
1907 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1908 return false;
1909 }
1910
1911 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1912 if (ImmVal)
1913 Shl.addImm(*ImmVal);
1914 else
1915 Shl.addUse(Src2Reg);
1917 I.eraseFromParent();
1918 return true;
1919}
1920
1921bool AArch64InstructionSelector::selectVectorAshrLshr(
1923 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1924 I.getOpcode() == TargetOpcode::G_LSHR);
1925 Register DstReg = I.getOperand(0).getReg();
1926 const LLT Ty = MRI.getType(DstReg);
1927 Register Src1Reg = I.getOperand(1).getReg();
1928 Register Src2Reg = I.getOperand(2).getReg();
1929
1930 if (!Ty.isVector())
1931 return false;
1932
1933 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1934
1935 // We expect the immediate case to be lowered in the PostLegalCombiner to
1936 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1937
1938 // There is not a shift right register instruction, but the shift left
1939 // register instruction takes a signed value, where negative numbers specify a
1940 // right shift.
1941
1942 unsigned Opc = 0;
1943 unsigned NegOpc = 0;
1944 const TargetRegisterClass *RC =
1945 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1946 if (Ty == LLT::fixed_vector(2, 64)) {
1947 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1948 NegOpc = AArch64::NEGv2i64;
1949 } else if (Ty == LLT::fixed_vector(4, 32)) {
1950 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1951 NegOpc = AArch64::NEGv4i32;
1952 } else if (Ty == LLT::fixed_vector(2, 32)) {
1953 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1954 NegOpc = AArch64::NEGv2i32;
1955 } else if (Ty == LLT::fixed_vector(4, 16)) {
1956 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1957 NegOpc = AArch64::NEGv4i16;
1958 } else if (Ty == LLT::fixed_vector(8, 16)) {
1959 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1960 NegOpc = AArch64::NEGv8i16;
1961 } else if (Ty == LLT::fixed_vector(16, 8)) {
1962 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1963 NegOpc = AArch64::NEGv16i8;
1964 } else if (Ty == LLT::fixed_vector(8, 8)) {
1965 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1966 NegOpc = AArch64::NEGv8i8;
1967 } else {
1968 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1969 return false;
1970 }
1971
1972 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1974 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1976 I.eraseFromParent();
1977 return true;
1978}
1979
1980bool AArch64InstructionSelector::selectVaStartAAPCS(
1982 return false;
1983}
1984
1985bool AArch64InstructionSelector::selectVaStartDarwin(
1988 Register ListReg = I.getOperand(0).getReg();
1989
1990 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1991
1992 int FrameIdx = FuncInfo->getVarArgsStackIndex();
1994 MF.getFunction().getCallingConv())) {
1995 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
1996 ? FuncInfo->getVarArgsGPRIndex()
1997 : FuncInfo->getVarArgsStackIndex();
1998 }
1999
2000 auto MIB =
2001 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2002 .addDef(ArgsAddrReg)
2003 .addFrameIndex(FrameIdx)
2004 .addImm(0)
2005 .addImm(0);
2006
2008
2009 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2010 .addUse(ArgsAddrReg)
2011 .addUse(ListReg)
2012 .addImm(0)
2013 .addMemOperand(*I.memoperands_begin());
2014
2016 I.eraseFromParent();
2017 return true;
2018}
2019
2020void AArch64InstructionSelector::materializeLargeCMVal(
2021 MachineInstr &I, const Value *V, unsigned OpFlags) {
2022 MachineBasicBlock &MBB = *I.getParent();
2023 MachineFunction &MF = *MBB.getParent();
2025
2026 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2027 MovZ->addOperand(MF, I.getOperand(1));
2028 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2030 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2032
2033 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2034 Register ForceDstReg) {
2035 Register DstReg = ForceDstReg
2036 ? ForceDstReg
2037 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2038 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2039 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2041 GV, MovZ->getOperand(1).getOffset(), Flags));
2042 } else {
2043 MovI->addOperand(
2044 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2045 MovZ->getOperand(1).getOffset(), Flags));
2046 }
2047 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2049 return DstReg;
2050 };
2051 Register DstReg = BuildMovK(MovZ.getReg(0),
2053 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2054 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2055}
2056
2057bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2058 MachineBasicBlock &MBB = *I.getParent();
2059 MachineFunction &MF = *MBB.getParent();
2061
2062 switch (I.getOpcode()) {
2063 case TargetOpcode::G_STORE: {
2064 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2065 MachineOperand &SrcOp = I.getOperand(0);
2066 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2067 // Allow matching with imported patterns for stores of pointers. Unlike
2068 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2069 // and constrain.
2070 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2071 Register NewSrc = Copy.getReg(0);
2072 SrcOp.setReg(NewSrc);
2073 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2074 Changed = true;
2075 }
2076 return Changed;
2077 }
2078 case TargetOpcode::G_PTR_ADD:
2079 return convertPtrAddToAdd(I, MRI);
2080 case TargetOpcode::G_LOAD: {
2081 // For scalar loads of pointers, we try to convert the dest type from p0
2082 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2083 // conversion, this should be ok because all users should have been
2084 // selected already, so the type doesn't matter for them.
2085 Register DstReg = I.getOperand(0).getReg();
2086 const LLT DstTy = MRI.getType(DstReg);
2087 if (!DstTy.isPointer())
2088 return false;
2089 MRI.setType(DstReg, LLT::scalar(64));
2090 return true;
2091 }
2092 case AArch64::G_DUP: {
2093 // Convert the type from p0 to s64 to help selection.
2094 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2095 if (!DstTy.isPointerVector())
2096 return false;
2097 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2098 MRI.setType(I.getOperand(0).getReg(),
2099 DstTy.changeElementType(LLT::scalar(64)));
2100 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2101 I.getOperand(1).setReg(NewSrc.getReg(0));
2102 return true;
2103 }
2104 case TargetOpcode::G_UITOFP:
2105 case TargetOpcode::G_SITOFP: {
2106 // If both source and destination regbanks are FPR, then convert the opcode
2107 // to G_SITOF so that the importer can select it to an fpr variant.
2108 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2109 // copy.
2110 Register SrcReg = I.getOperand(1).getReg();
2111 LLT SrcTy = MRI.getType(SrcReg);
2112 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2113 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2114 return false;
2115
2116 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2117 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2118 I.setDesc(TII.get(AArch64::G_SITOF));
2119 else
2120 I.setDesc(TII.get(AArch64::G_UITOF));
2121 return true;
2122 }
2123 return false;
2124 }
2125 default:
2126 return false;
2127 }
2128}
2129
2130/// This lowering tries to look for G_PTR_ADD instructions and then converts
2131/// them to a standard G_ADD with a COPY on the source.
2132///
2133/// The motivation behind this is to expose the add semantics to the imported
2134/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2135/// because the selector works bottom up, uses before defs. By the time we
2136/// end up trying to select a G_PTR_ADD, we should have already attempted to
2137/// fold this into addressing modes and were therefore unsuccessful.
2138bool AArch64InstructionSelector::convertPtrAddToAdd(
2140 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2141 Register DstReg = I.getOperand(0).getReg();
2142 Register AddOp1Reg = I.getOperand(1).getReg();
2143 const LLT PtrTy = MRI.getType(DstReg);
2144 if (PtrTy.getAddressSpace() != 0)
2145 return false;
2146
2147 const LLT CastPtrTy =
2148 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2149 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2150 // Set regbanks on the registers.
2151 if (PtrTy.isVector())
2152 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2153 else
2154 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2155
2156 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2157 // %dst(intty) = G_ADD %intbase, off
2158 I.setDesc(TII.get(TargetOpcode::G_ADD));
2159 MRI.setType(DstReg, CastPtrTy);
2160 I.getOperand(1).setReg(PtrToInt.getReg(0));
2161 if (!select(*PtrToInt)) {
2162 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2163 return false;
2164 }
2165
2166 // Also take the opportunity here to try to do some optimization.
2167 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2168 Register NegatedReg;
2169 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2170 return true;
2171 I.getOperand(2).setReg(NegatedReg);
2172 I.setDesc(TII.get(TargetOpcode::G_SUB));
2173 return true;
2174}
2175
2176bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2178 // We try to match the immediate variant of LSL, which is actually an alias
2179 // for a special case of UBFM. Otherwise, we fall back to the imported
2180 // selector which will match the register variant.
2181 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2182 const auto &MO = I.getOperand(2);
2183 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2184 if (!VRegAndVal)
2185 return false;
2186
2187 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2188 if (DstTy.isVector())
2189 return false;
2190 bool Is64Bit = DstTy.getSizeInBits() == 64;
2191 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2192 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2193
2194 if (!Imm1Fn || !Imm2Fn)
2195 return false;
2196
2197 auto NewI =
2198 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2199 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2200
2201 for (auto &RenderFn : *Imm1Fn)
2202 RenderFn(NewI);
2203 for (auto &RenderFn : *Imm2Fn)
2204 RenderFn(NewI);
2205
2206 I.eraseFromParent();
2207 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2208}
2209
2210bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2212 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2213 // If we're storing a scalar, it doesn't matter what register bank that
2214 // scalar is on. All that matters is the size.
2215 //
2216 // So, if we see something like this (with a 32-bit scalar as an example):
2217 //
2218 // %x:gpr(s32) = ... something ...
2219 // %y:fpr(s32) = COPY %x:gpr(s32)
2220 // G_STORE %y:fpr(s32)
2221 //
2222 // We can fix this up into something like this:
2223 //
2224 // G_STORE %x:gpr(s32)
2225 //
2226 // And then continue the selection process normally.
2227 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2228 if (!DefDstReg.isValid())
2229 return false;
2230 LLT DefDstTy = MRI.getType(DefDstReg);
2231 Register StoreSrcReg = I.getOperand(0).getReg();
2232 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2233
2234 // If we get something strange like a physical register, then we shouldn't
2235 // go any further.
2236 if (!DefDstTy.isValid())
2237 return false;
2238
2239 // Are the source and dst types the same size?
2240 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2241 return false;
2242
2243 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2244 RBI.getRegBank(DefDstReg, MRI, TRI))
2245 return false;
2246
2247 // We have a cross-bank copy, which is entering a store. Let's fold it.
2248 I.getOperand(0).setReg(DefDstReg);
2249 return true;
2250}
2251
2252bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2253 assert(I.getParent() && "Instruction should be in a basic block!");
2254 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2255
2256 MachineBasicBlock &MBB = *I.getParent();
2257 MachineFunction &MF = *MBB.getParent();
2259
2260 switch (I.getOpcode()) {
2261 case AArch64::G_DUP: {
2262 // Before selecting a DUP instruction, check if it is better selected as a
2263 // MOV or load from a constant pool.
2264 Register Src = I.getOperand(1).getReg();
2265 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2266 if (!ValAndVReg)
2267 return false;
2268 LLVMContext &Ctx = MF.getFunction().getContext();
2269 Register Dst = I.getOperand(0).getReg();
2271 MRI.getType(Dst).getNumElements(),
2272 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2273 ValAndVReg->Value));
2274 if (!emitConstantVector(Dst, CV, MIB, MRI))
2275 return false;
2276 I.eraseFromParent();
2277 return true;
2278 }
2279 case TargetOpcode::G_SEXT:
2280 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2281 // over a normal extend.
2282 if (selectUSMovFromExtend(I, MRI))
2283 return true;
2284 return false;
2285 case TargetOpcode::G_BR:
2286 return false;
2287 case TargetOpcode::G_SHL:
2288 return earlySelectSHL(I, MRI);
2289 case TargetOpcode::G_CONSTANT: {
2290 bool IsZero = false;
2291 if (I.getOperand(1).isCImm())
2292 IsZero = I.getOperand(1).getCImm()->isZero();
2293 else if (I.getOperand(1).isImm())
2294 IsZero = I.getOperand(1).getImm() == 0;
2295
2296 if (!IsZero)
2297 return false;
2298
2299 Register DefReg = I.getOperand(0).getReg();
2300 LLT Ty = MRI.getType(DefReg);
2301 if (Ty.getSizeInBits() == 64) {
2302 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2303 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2304 } else if (Ty.getSizeInBits() == 32) {
2305 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2306 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2307 } else
2308 return false;
2309
2310 I.setDesc(TII.get(TargetOpcode::COPY));
2311 return true;
2312 }
2313
2314 case TargetOpcode::G_ADD: {
2315 // Check if this is being fed by a G_ICMP on either side.
2316 //
2317 // (cmp pred, x, y) + z
2318 //
2319 // In the above case, when the cmp is true, we increment z by 1. So, we can
2320 // fold the add into the cset for the cmp by using cinc.
2321 //
2322 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2323 Register AddDst = I.getOperand(0).getReg();
2324 Register AddLHS = I.getOperand(1).getReg();
2325 Register AddRHS = I.getOperand(2).getReg();
2326 // Only handle scalars.
2327 LLT Ty = MRI.getType(AddLHS);
2328 if (Ty.isVector())
2329 return false;
2330 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2331 // bits.
2332 unsigned Size = Ty.getSizeInBits();
2333 if (Size != 32 && Size != 64)
2334 return false;
2335 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2336 if (!MRI.hasOneNonDBGUse(Reg))
2337 return nullptr;
2338 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2339 // compare.
2340 if (Size == 32)
2341 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2342 // We model scalar compares using 32-bit destinations right now.
2343 // If it's a 64-bit compare, it'll have 64-bit sources.
2344 Register ZExt;
2345 if (!mi_match(Reg, MRI,
2347 return nullptr;
2348 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2349 if (!Cmp ||
2350 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2351 return nullptr;
2352 return Cmp;
2353 };
2354 // Try to match
2355 // z + (cmp pred, x, y)
2356 MachineInstr *Cmp = MatchCmp(AddRHS);
2357 if (!Cmp) {
2358 // (cmp pred, x, y) + z
2359 std::swap(AddLHS, AddRHS);
2360 Cmp = MatchCmp(AddRHS);
2361 if (!Cmp)
2362 return false;
2363 }
2364 auto &PredOp = Cmp->getOperand(1);
2365 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2366 const AArch64CC::CondCode InvCC =
2369 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2370 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2371 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2372 I.eraseFromParent();
2373 return true;
2374 }
2375 case TargetOpcode::G_OR: {
2376 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2377 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2378 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2379 Register Dst = I.getOperand(0).getReg();
2380 LLT Ty = MRI.getType(Dst);
2381
2382 if (!Ty.isScalar())
2383 return false;
2384
2385 unsigned Size = Ty.getSizeInBits();
2386 if (Size != 32 && Size != 64)
2387 return false;
2388
2389 Register ShiftSrc;
2390 int64_t ShiftImm;
2391 Register MaskSrc;
2392 int64_t MaskImm;
2393 if (!mi_match(
2394 Dst, MRI,
2395 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2396 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2397 return false;
2398
2399 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2400 return false;
2401
2402 int64_t Immr = Size - ShiftImm;
2403 int64_t Imms = Size - ShiftImm - 1;
2404 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2405 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2406 I.eraseFromParent();
2407 return true;
2408 }
2409 case TargetOpcode::G_FENCE: {
2410 if (I.getOperand(1).getImm() == 0)
2411 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2412 else
2413 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2414 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2415 I.eraseFromParent();
2416 return true;
2417 }
2418 default:
2419 return false;
2420 }
2421}
2422
2423bool AArch64InstructionSelector::select(MachineInstr &I) {
2424 assert(I.getParent() && "Instruction should be in a basic block!");
2425 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2426
2427 MachineBasicBlock &MBB = *I.getParent();
2428 MachineFunction &MF = *MBB.getParent();
2430
2431 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2432 if (Subtarget->requiresStrictAlign()) {
2433 // We don't support this feature yet.
2434 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2435 return false;
2436 }
2437
2439
2440 unsigned Opcode = I.getOpcode();
2441 // G_PHI requires same handling as PHI
2442 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2443 // Certain non-generic instructions also need some special handling.
2444
2445 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2447
2448 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2449 const Register DefReg = I.getOperand(0).getReg();
2450 const LLT DefTy = MRI.getType(DefReg);
2451
2452 const RegClassOrRegBank &RegClassOrBank =
2453 MRI.getRegClassOrRegBank(DefReg);
2454
2455 const TargetRegisterClass *DefRC
2456 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2457 if (!DefRC) {
2458 if (!DefTy.isValid()) {
2459 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2460 return false;
2461 }
2462 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2463 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2464 if (!DefRC) {
2465 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2466 return false;
2467 }
2468 }
2469
2470 I.setDesc(TII.get(TargetOpcode::PHI));
2471
2472 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2473 }
2474
2475 if (I.isCopy())
2476 return selectCopy(I, TII, MRI, TRI, RBI);
2477
2478 if (I.isDebugInstr())
2479 return selectDebugInstr(I, MRI, RBI);
2480
2481 return true;
2482 }
2483
2484
2485 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2486 LLVM_DEBUG(
2487 dbgs() << "Generic instruction has unexpected implicit operands\n");
2488 return false;
2489 }
2490
2491 // Try to do some lowering before we start instruction selecting. These
2492 // lowerings are purely transformations on the input G_MIR and so selection
2493 // must continue after any modification of the instruction.
2494 if (preISelLower(I)) {
2495 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2496 }
2497
2498 // There may be patterns where the importer can't deal with them optimally,
2499 // but does select it to a suboptimal sequence so our custom C++ selection
2500 // code later never has a chance to work on it. Therefore, we have an early
2501 // selection attempt here to give priority to certain selection routines
2502 // over the imported ones.
2503 if (earlySelect(I))
2504 return true;
2505
2506 if (selectImpl(I, *CoverageInfo))
2507 return true;
2508
2509 LLT Ty =
2510 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2511
2512 switch (Opcode) {
2513 case TargetOpcode::G_SBFX:
2514 case TargetOpcode::G_UBFX: {
2515 static const unsigned OpcTable[2][2] = {
2516 {AArch64::UBFMWri, AArch64::UBFMXri},
2517 {AArch64::SBFMWri, AArch64::SBFMXri}};
2518 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2519 unsigned Size = Ty.getSizeInBits();
2520 unsigned Opc = OpcTable[IsSigned][Size == 64];
2521 auto Cst1 =
2522 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2523 assert(Cst1 && "Should have gotten a constant for src 1?");
2524 auto Cst2 =
2525 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2526 assert(Cst2 && "Should have gotten a constant for src 2?");
2527 auto LSB = Cst1->Value.getZExtValue();
2528 auto Width = Cst2->Value.getZExtValue();
2529 auto BitfieldInst =
2530 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2531 .addImm(LSB)
2532 .addImm(LSB + Width - 1);
2533 I.eraseFromParent();
2534 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2535 }
2536 case TargetOpcode::G_BRCOND:
2537 return selectCompareBranch(I, MF, MRI);
2538
2539 case TargetOpcode::G_BRINDIRECT: {
2540 I.setDesc(TII.get(AArch64::BR));
2542 }
2543
2544 case TargetOpcode::G_BRJT:
2545 return selectBrJT(I, MRI);
2546
2547 case AArch64::G_ADD_LOW: {
2548 // This op may have been separated from it's ADRP companion by the localizer
2549 // or some other code motion pass. Given that many CPUs will try to
2550 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2551 // which will later be expanded into an ADRP+ADD pair after scheduling.
2552 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2553 if (BaseMI->getOpcode() != AArch64::ADRP) {
2554 I.setDesc(TII.get(AArch64::ADDXri));
2555 I.addOperand(MachineOperand::CreateImm(0));
2557 }
2558 assert(TM.getCodeModel() == CodeModel::Small &&
2559 "Expected small code model");
2560 auto Op1 = BaseMI->getOperand(1);
2561 auto Op2 = I.getOperand(2);
2562 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2563 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2564 Op1.getTargetFlags())
2565 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2566 Op2.getTargetFlags());
2567 I.eraseFromParent();
2568 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2569 }
2570
2571 case TargetOpcode::G_FCONSTANT:
2572 case TargetOpcode::G_CONSTANT: {
2573 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2574
2575 const LLT s8 = LLT::scalar(8);
2576 const LLT s16 = LLT::scalar(16);
2577 const LLT s32 = LLT::scalar(32);
2578 const LLT s64 = LLT::scalar(64);
2579 const LLT s128 = LLT::scalar(128);
2580 const LLT p0 = LLT::pointer(0, 64);
2581
2582 const Register DefReg = I.getOperand(0).getReg();
2583 const LLT DefTy = MRI.getType(DefReg);
2584 const unsigned DefSize = DefTy.getSizeInBits();
2585 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2586
2587 // FIXME: Redundant check, but even less readable when factored out.
2588 if (isFP) {
2589 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2590 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2591 << " constant, expected: " << s16 << " or " << s32
2592 << " or " << s64 << " or " << s128 << '\n');
2593 return false;
2594 }
2595
2596 if (RB.getID() != AArch64::FPRRegBankID) {
2597 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2598 << " constant on bank: " << RB
2599 << ", expected: FPR\n");
2600 return false;
2601 }
2602
2603 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2604 // can be sure tablegen works correctly and isn't rescued by this code.
2605 // 0.0 is not covered by tablegen for FP128. So we will handle this
2606 // scenario in the code here.
2607 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2608 return false;
2609 } else {
2610 // s32 and s64 are covered by tablegen.
2611 if (Ty != p0 && Ty != s8 && Ty != s16) {
2612 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2613 << " constant, expected: " << s32 << ", " << s64
2614 << ", or " << p0 << '\n');
2615 return false;
2616 }
2617
2618 if (RB.getID() != AArch64::GPRRegBankID) {
2619 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2620 << " constant on bank: " << RB
2621 << ", expected: GPR\n");
2622 return false;
2623 }
2624 }
2625
2626 if (isFP) {
2627 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2628 // For 16, 64, and 128b values, emit a constant pool load.
2629 switch (DefSize) {
2630 default:
2631 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2632 case 32:
2633 case 64: {
2634 bool OptForSize = shouldOptForSize(&MF);
2635 const auto &TLI = MF.getSubtarget().getTargetLowering();
2636 // If TLI says that this fpimm is illegal, then we'll expand to a
2637 // constant pool load.
2638 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2639 EVT::getFloatingPointVT(DefSize), OptForSize))
2640 break;
2641 [[fallthrough]];
2642 }
2643 case 16:
2644 case 128: {
2645 auto *FPImm = I.getOperand(1).getFPImm();
2646 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2647 if (!LoadMI) {
2648 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2649 return false;
2650 }
2651 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2652 I.eraseFromParent();
2653 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2654 }
2655 }
2656
2657 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2658 // Either emit a FMOV, or emit a copy to emit a normal mov.
2659 const Register DefGPRReg = MRI.createVirtualRegister(
2660 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2661 MachineOperand &RegOp = I.getOperand(0);
2662 RegOp.setReg(DefGPRReg);
2663 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2664 MIB.buildCopy({DefReg}, {DefGPRReg});
2665
2666 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2667 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2668 return false;
2669 }
2670
2671 MachineOperand &ImmOp = I.getOperand(1);
2672 // FIXME: Is going through int64_t always correct?
2673 ImmOp.ChangeToImmediate(
2675 } else if (I.getOperand(1).isCImm()) {
2676 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2677 I.getOperand(1).ChangeToImmediate(Val);
2678 } else if (I.getOperand(1).isImm()) {
2679 uint64_t Val = I.getOperand(1).getImm();
2680 I.getOperand(1).ChangeToImmediate(Val);
2681 }
2682
2683 const unsigned MovOpc =
2684 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2685 I.setDesc(TII.get(MovOpc));
2687 return true;
2688 }
2689 case TargetOpcode::G_EXTRACT: {
2690 Register DstReg = I.getOperand(0).getReg();
2691 Register SrcReg = I.getOperand(1).getReg();
2692 LLT SrcTy = MRI.getType(SrcReg);
2693 LLT DstTy = MRI.getType(DstReg);
2694 (void)DstTy;
2695 unsigned SrcSize = SrcTy.getSizeInBits();
2696
2697 if (SrcTy.getSizeInBits() > 64) {
2698 // This should be an extract of an s128, which is like a vector extract.
2699 if (SrcTy.getSizeInBits() != 128)
2700 return false;
2701 // Only support extracting 64 bits from an s128 at the moment.
2702 if (DstTy.getSizeInBits() != 64)
2703 return false;
2704
2705 unsigned Offset = I.getOperand(2).getImm();
2706 if (Offset % 64 != 0)
2707 return false;
2708
2709 // Check we have the right regbank always.
2710 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2711 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2712 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2713
2714 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2715 auto NewI =
2716 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2717 .addUse(SrcReg, 0,
2718 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2719 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2720 AArch64::GPR64RegClass, NewI->getOperand(0));
2721 I.eraseFromParent();
2722 return true;
2723 }
2724
2725 // Emit the same code as a vector extract.
2726 // Offset must be a multiple of 64.
2727 unsigned LaneIdx = Offset / 64;
2728 MachineInstr *Extract = emitExtractVectorElt(
2729 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2730 if (!Extract)
2731 return false;
2732 I.eraseFromParent();
2733 return true;
2734 }
2735
2736 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2737 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2738 Ty.getSizeInBits() - 1);
2739
2740 if (SrcSize < 64) {
2741 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2742 "unexpected G_EXTRACT types");
2744 }
2745
2746 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2747 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2748 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2749 .addReg(DstReg, 0, AArch64::sub_32);
2750 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2751 AArch64::GPR32RegClass, MRI);
2752 I.getOperand(0).setReg(DstReg);
2753
2755 }
2756
2757 case TargetOpcode::G_INSERT: {
2758 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2759 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2760 unsigned DstSize = DstTy.getSizeInBits();
2761 // Larger inserts are vectors, same-size ones should be something else by
2762 // now (split up or turned into COPYs).
2763 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2764 return false;
2765
2766 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2767 unsigned LSB = I.getOperand(3).getImm();
2768 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2769 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2770 MachineInstrBuilder(MF, I).addImm(Width - 1);
2771
2772 if (DstSize < 64) {
2773 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2774 "unexpected G_INSERT types");
2776 }
2777
2778 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2779 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2780 TII.get(AArch64::SUBREG_TO_REG))
2781 .addDef(SrcReg)
2782 .addImm(0)
2783 .addUse(I.getOperand(2).getReg())
2784 .addImm(AArch64::sub_32);
2785 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2786 AArch64::GPR32RegClass, MRI);
2787 I.getOperand(2).setReg(SrcReg);
2788
2790 }
2791 case TargetOpcode::G_FRAME_INDEX: {
2792 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2793 if (Ty != LLT::pointer(0, 64)) {
2794 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2795 << ", expected: " << LLT::pointer(0, 64) << '\n');
2796 return false;
2797 }
2798 I.setDesc(TII.get(AArch64::ADDXri));
2799
2800 // MOs for a #0 shifted immediate.
2801 I.addOperand(MachineOperand::CreateImm(0));
2802 I.addOperand(MachineOperand::CreateImm(0));
2803
2805 }
2806
2807 case TargetOpcode::G_GLOBAL_VALUE: {
2808 const GlobalValue *GV = nullptr;
2809 unsigned OpFlags;
2810 if (I.getOperand(1).isSymbol()) {
2811 OpFlags = I.getOperand(1).getTargetFlags();
2812 // Currently only used by "RtLibUseGOT".
2813 assert(OpFlags == AArch64II::MO_GOT);
2814 } else {
2815 GV = I.getOperand(1).getGlobal();
2816 if (GV->isThreadLocal())
2817 return selectTLSGlobalValue(I, MRI);
2818 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2819 }
2820
2821 if (OpFlags & AArch64II::MO_GOT) {
2822 I.setDesc(TII.get(AArch64::LOADgot));
2823 I.getOperand(1).setTargetFlags(OpFlags);
2824 } else if (TM.getCodeModel() == CodeModel::Large &&
2825 !TM.isPositionIndependent()) {
2826 // Materialize the global using movz/movk instructions.
2827 materializeLargeCMVal(I, GV, OpFlags);
2828 I.eraseFromParent();
2829 return true;
2830 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2831 I.setDesc(TII.get(AArch64::ADR));
2832 I.getOperand(1).setTargetFlags(OpFlags);
2833 } else {
2834 I.setDesc(TII.get(AArch64::MOVaddr));
2835 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2836 MachineInstrBuilder MIB(MF, I);
2837 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2839 }
2841 }
2842
2843 case TargetOpcode::G_ZEXTLOAD:
2844 case TargetOpcode::G_LOAD:
2845 case TargetOpcode::G_STORE: {
2846 GLoadStore &LdSt = cast<GLoadStore>(I);
2847 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2848 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2849
2850 if (PtrTy != LLT::pointer(0, 64)) {
2851 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2852 << ", expected: " << LLT::pointer(0, 64) << '\n');
2853 return false;
2854 }
2855
2856 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2857 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2858 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2859
2860 // Need special instructions for atomics that affect ordering.
2861 if (Order != AtomicOrdering::NotAtomic &&
2862 Order != AtomicOrdering::Unordered &&
2863 Order != AtomicOrdering::Monotonic) {
2864 assert(!isa<GZExtLoad>(LdSt));
2865 if (MemSizeInBytes > 64)
2866 return false;
2867
2868 if (isa<GLoad>(LdSt)) {
2869 static constexpr unsigned LDAPROpcodes[] = {
2870 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2871 static constexpr unsigned LDAROpcodes[] = {
2872 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2873 ArrayRef<unsigned> Opcodes =
2874 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2875 ? LDAPROpcodes
2876 : LDAROpcodes;
2877 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2878 } else {
2879 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2880 AArch64::STLRW, AArch64::STLRX};
2881 Register ValReg = LdSt.getReg(0);
2882 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2883 // Emit a subreg copy of 32 bits.
2884 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2885 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2886 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2887 I.getOperand(0).setReg(NewVal);
2888 }
2889 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2890 }
2892 return true;
2893 }
2894
2895#ifndef NDEBUG
2896 const Register PtrReg = LdSt.getPointerReg();
2897 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2898 // Check that the pointer register is valid.
2899 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2900 "Load/Store pointer operand isn't a GPR");
2901 assert(MRI.getType(PtrReg).isPointer() &&
2902 "Load/Store pointer operand isn't a pointer");
2903#endif
2904
2905 const Register ValReg = LdSt.getReg(0);
2906 const LLT ValTy = MRI.getType(ValReg);
2907 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2908
2909 // The code below doesn't support truncating stores, so we need to split it
2910 // again.
2911 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2912 unsigned SubReg;
2913 LLT MemTy = LdSt.getMMO().getMemoryType();
2914 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2915 if (!getSubRegForClass(RC, TRI, SubReg))
2916 return false;
2917
2918 // Generate a subreg copy.
2919 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2920 .addReg(ValReg, 0, SubReg)
2921 .getReg(0);
2922 RBI.constrainGenericRegister(Copy, *RC, MRI);
2923 LdSt.getOperand(0).setReg(Copy);
2924 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2925 // If this is an any-extending load from the FPR bank, split it into a regular
2926 // load + extend.
2927 if (RB.getID() == AArch64::FPRRegBankID) {
2928 unsigned SubReg;
2929 LLT MemTy = LdSt.getMMO().getMemoryType();
2930 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2931 if (!getSubRegForClass(RC, TRI, SubReg))
2932 return false;
2933 Register OldDst = LdSt.getReg(0);
2934 Register NewDst =
2935 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2936 LdSt.getOperand(0).setReg(NewDst);
2937 MRI.setRegBank(NewDst, RB);
2938 // Generate a SUBREG_TO_REG to extend it.
2939 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2940 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2941 .addImm(0)
2942 .addUse(NewDst)
2943 .addImm(SubReg);
2944 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2945 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2946 MIB.setInstr(LdSt);
2947 }
2948 }
2949
2950 // Helper lambda for partially selecting I. Either returns the original
2951 // instruction with an updated opcode, or a new instruction.
2952 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2953 bool IsStore = isa<GStore>(I);
2954 const unsigned NewOpc =
2955 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2956 if (NewOpc == I.getOpcode())
2957 return nullptr;
2958 // Check if we can fold anything into the addressing mode.
2959 auto AddrModeFns =
2960 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2961 if (!AddrModeFns) {
2962 // Can't fold anything. Use the original instruction.
2963 I.setDesc(TII.get(NewOpc));
2964 I.addOperand(MachineOperand::CreateImm(0));
2965 return &I;
2966 }
2967
2968 // Folded something. Create a new instruction and return it.
2969 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2970 Register CurValReg = I.getOperand(0).getReg();
2971 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2972 NewInst.cloneMemRefs(I);
2973 for (auto &Fn : *AddrModeFns)
2974 Fn(NewInst);
2975 I.eraseFromParent();
2976 return &*NewInst;
2977 };
2978
2979 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2980 if (!LoadStore)
2981 return false;
2982
2983 // If we're storing a 0, use WZR/XZR.
2984 if (Opcode == TargetOpcode::G_STORE) {
2986 LoadStore->getOperand(0).getReg(), MRI);
2987 if (CVal && CVal->Value == 0) {
2988 switch (LoadStore->getOpcode()) {
2989 case AArch64::STRWui:
2990 case AArch64::STRHHui:
2991 case AArch64::STRBBui:
2992 LoadStore->getOperand(0).setReg(AArch64::WZR);
2993 break;
2994 case AArch64::STRXui:
2995 LoadStore->getOperand(0).setReg(AArch64::XZR);
2996 break;
2997 }
2998 }
2999 }
3000
3001 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3002 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3003 // The any/zextload from a smaller type to i32 should be handled by the
3004 // importer.
3005 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3006 return false;
3007 // If we have an extending load then change the load's type to be a
3008 // narrower reg and zero_extend with SUBREG_TO_REG.
3009 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3010 Register DstReg = LoadStore->getOperand(0).getReg();
3011 LoadStore->getOperand(0).setReg(LdReg);
3012
3013 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3014 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3015 .addImm(0)
3016 .addUse(LdReg)
3017 .addImm(AArch64::sub_32);
3018 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3019 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3020 MRI);
3021 }
3022 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3023 }
3024
3025 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3026 case TargetOpcode::G_INDEXED_SEXTLOAD:
3027 return selectIndexedExtLoad(I, MRI);
3028 case TargetOpcode::G_INDEXED_LOAD:
3029 return selectIndexedLoad(I, MRI);
3030 case TargetOpcode::G_INDEXED_STORE:
3031 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3032
3033 case TargetOpcode::G_LSHR:
3034 case TargetOpcode::G_ASHR:
3035 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3036 return selectVectorAshrLshr(I, MRI);
3037 [[fallthrough]];
3038 case TargetOpcode::G_SHL:
3039 if (Opcode == TargetOpcode::G_SHL &&
3040 MRI.getType(I.getOperand(0).getReg()).isVector())
3041 return selectVectorSHL(I, MRI);
3042
3043 // These shifts were legalized to have 64 bit shift amounts because we
3044 // want to take advantage of the selection patterns that assume the
3045 // immediates are s64s, however, selectBinaryOp will assume both operands
3046 // will have the same bit size.
3047 {
3048 Register SrcReg = I.getOperand(1).getReg();
3049 Register ShiftReg = I.getOperand(2).getReg();
3050 const LLT ShiftTy = MRI.getType(ShiftReg);
3051 const LLT SrcTy = MRI.getType(SrcReg);
3052 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3053 ShiftTy.getSizeInBits() == 64) {
3054 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3055 // Insert a subregister copy to implement a 64->32 trunc
3056 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3057 .addReg(ShiftReg, 0, AArch64::sub_32);
3058 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3059 I.getOperand(2).setReg(Trunc.getReg(0));
3060 }
3061 }
3062 [[fallthrough]];
3063 case TargetOpcode::G_OR: {
3064 // Reject the various things we don't support yet.
3065 if (unsupportedBinOp(I, RBI, MRI, TRI))
3066 return false;
3067
3068 const unsigned OpSize = Ty.getSizeInBits();
3069
3070 const Register DefReg = I.getOperand(0).getReg();
3071 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3072
3073 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3074 if (NewOpc == I.getOpcode())
3075 return false;
3076
3077 I.setDesc(TII.get(NewOpc));
3078 // FIXME: Should the type be always reset in setDesc?
3079
3080 // Now that we selected an opcode, we need to constrain the register
3081 // operands to use appropriate classes.
3083 }
3084
3085 case TargetOpcode::G_PTR_ADD: {
3086 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3087 I.eraseFromParent();
3088 return true;
3089 }
3090
3091 case TargetOpcode::G_SADDE:
3092 case TargetOpcode::G_UADDE:
3093 case TargetOpcode::G_SSUBE:
3094 case TargetOpcode::G_USUBE:
3095 case TargetOpcode::G_SADDO:
3096 case TargetOpcode::G_UADDO:
3097 case TargetOpcode::G_SSUBO:
3098 case TargetOpcode::G_USUBO:
3099 return selectOverflowOp(I, MRI);
3100
3101 case TargetOpcode::G_PTRMASK: {
3102 Register MaskReg = I.getOperand(2).getReg();
3103 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3104 // TODO: Implement arbitrary cases
3105 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3106 return false;
3107
3108 uint64_t Mask = *MaskVal;
3109 I.setDesc(TII.get(AArch64::ANDXri));
3110 I.getOperand(2).ChangeToImmediate(
3112
3114 }
3115 case TargetOpcode::G_PTRTOINT:
3116 case TargetOpcode::G_TRUNC: {
3117 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3118 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3119
3120 const Register DstReg = I.getOperand(0).getReg();
3121 const Register SrcReg = I.getOperand(1).getReg();
3122
3123 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3124 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3125
3126 if (DstRB.getID() != SrcRB.getID()) {
3127 LLVM_DEBUG(
3128 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3129 return false;
3130 }
3131
3132 if (DstRB.getID() == AArch64::GPRRegBankID) {
3133 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3134 if (!DstRC)
3135 return false;
3136
3137 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3138 if (!SrcRC)
3139 return false;
3140
3141 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3142 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3143 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3144 return false;
3145 }
3146
3147 if (DstRC == SrcRC) {
3148 // Nothing to be done
3149 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3150 SrcTy == LLT::scalar(64)) {
3151 llvm_unreachable("TableGen can import this case");
3152 return false;
3153 } else if (DstRC == &AArch64::GPR32RegClass &&
3154 SrcRC == &AArch64::GPR64RegClass) {
3155 I.getOperand(1).setSubReg(AArch64::sub_32);
3156 } else {
3157 LLVM_DEBUG(
3158 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3159 return false;
3160 }
3161
3162 I.setDesc(TII.get(TargetOpcode::COPY));
3163 return true;
3164 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3165 if (DstTy == LLT::fixed_vector(4, 16) &&
3166 SrcTy == LLT::fixed_vector(4, 32)) {
3167 I.setDesc(TII.get(AArch64::XTNv4i16));
3169 return true;
3170 }
3171
3172 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3173 MachineInstr *Extract = emitExtractVectorElt(
3174 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3175 if (!Extract)
3176 return false;
3177 I.eraseFromParent();
3178 return true;
3179 }
3180
3181 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3182 if (Opcode == TargetOpcode::G_PTRTOINT) {
3183 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3184 I.setDesc(TII.get(TargetOpcode::COPY));
3185 return selectCopy(I, TII, MRI, TRI, RBI);
3186 }
3187 }
3188
3189 return false;
3190 }
3191
3192 case TargetOpcode::G_ANYEXT: {
3193 if (selectUSMovFromExtend(I, MRI))
3194 return true;
3195
3196 const Register DstReg = I.getOperand(0).getReg();
3197 const Register SrcReg = I.getOperand(1).getReg();
3198
3199 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3200 if (RBDst.getID() != AArch64::GPRRegBankID) {
3201 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3202 << ", expected: GPR\n");
3203 return false;
3204 }
3205
3206 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3207 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3208 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3209 << ", expected: GPR\n");
3210 return false;
3211 }
3212
3213 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3214
3215 if (DstSize == 0) {
3216 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3217 return false;
3218 }
3219
3220 if (DstSize != 64 && DstSize > 32) {
3221 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3222 << ", expected: 32 or 64\n");
3223 return false;
3224 }
3225 // At this point G_ANYEXT is just like a plain COPY, but we need
3226 // to explicitly form the 64-bit value if any.
3227 if (DstSize > 32) {
3228 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3229 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3230 .addDef(ExtSrc)
3231 .addImm(0)
3232 .addUse(SrcReg)
3233 .addImm(AArch64::sub_32);
3234 I.getOperand(1).setReg(ExtSrc);
3235 }
3236 return selectCopy(I, TII, MRI, TRI, RBI);
3237 }
3238
3239 case TargetOpcode::G_ZEXT:
3240 case TargetOpcode::G_SEXT_INREG:
3241 case TargetOpcode::G_SEXT: {
3242 if (selectUSMovFromExtend(I, MRI))
3243 return true;
3244
3245 unsigned Opcode = I.getOpcode();
3246 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3247 const Register DefReg = I.getOperand(0).getReg();
3248 Register SrcReg = I.getOperand(1).getReg();
3249 const LLT DstTy = MRI.getType(DefReg);
3250 const LLT SrcTy = MRI.getType(SrcReg);
3251 unsigned DstSize = DstTy.getSizeInBits();
3252 unsigned SrcSize = SrcTy.getSizeInBits();
3253
3254 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3255 // extended is encoded in the imm.
3256 if (Opcode == TargetOpcode::G_SEXT_INREG)
3257 SrcSize = I.getOperand(2).getImm();
3258
3259 if (DstTy.isVector())
3260 return false; // Should be handled by imported patterns.
3261
3262 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3263 AArch64::GPRRegBankID &&
3264 "Unexpected ext regbank");
3265
3266 MachineInstr *ExtI;
3267
3268 // First check if we're extending the result of a load which has a dest type
3269 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3270 // GPR register on AArch64 and all loads which are smaller automatically
3271 // zero-extend the upper bits. E.g.
3272 // %v(s8) = G_LOAD %p, :: (load 1)
3273 // %v2(s32) = G_ZEXT %v(s8)
3274 if (!IsSigned) {
3275 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3276 bool IsGPR =
3277 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3278 if (LoadMI && IsGPR) {
3279 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3280 unsigned BytesLoaded = MemOp->getSize().getValue();
3281 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3282 return selectCopy(I, TII, MRI, TRI, RBI);
3283 }
3284
3285 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3286 // + SUBREG_TO_REG.
3287 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3288 Register SubregToRegSrc =
3289 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3290 const Register ZReg = AArch64::WZR;
3291 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3292 .addImm(0);
3293
3294 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3295 .addImm(0)
3296 .addUse(SubregToRegSrc)
3297 .addImm(AArch64::sub_32);
3298
3299 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3300 MRI)) {
3301 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3302 return false;
3303 }
3304
3305 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3306 MRI)) {
3307 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3308 return false;
3309 }
3310
3311 I.eraseFromParent();
3312 return true;
3313 }
3314 }
3315
3316 if (DstSize == 64) {
3317 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3318 // FIXME: Can we avoid manually doing this?
3319 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3320 MRI)) {
3321 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3322 << " operand\n");
3323 return false;
3324 }
3325 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3326 {&AArch64::GPR64RegClass}, {})
3327 .addImm(0)
3328 .addUse(SrcReg)
3329 .addImm(AArch64::sub_32)
3330 .getReg(0);
3331 }
3332
3333 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3334 {DefReg}, {SrcReg})
3335 .addImm(0)
3336 .addImm(SrcSize - 1);
3337 } else if (DstSize <= 32) {
3338 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3339 {DefReg}, {SrcReg})
3340 .addImm(0)
3341 .addImm(SrcSize - 1);
3342 } else {
3343 return false;
3344 }
3345
3347 I.eraseFromParent();
3348 return true;
3349 }
3350
3351 case TargetOpcode::G_SITOFP:
3352 case TargetOpcode::G_UITOFP:
3353 case TargetOpcode::G_FPTOSI:
3354 case TargetOpcode::G_FPTOUI: {
3355 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3356 SrcTy = MRI.getType(I.getOperand(1).getReg());
3357 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3358 if (NewOpc == Opcode)
3359 return false;
3360
3361 I.setDesc(TII.get(NewOpc));
3363 I.setFlags(MachineInstr::NoFPExcept);
3364
3365 return true;
3366 }
3367
3368 case TargetOpcode::G_FREEZE:
3369 return selectCopy(I, TII, MRI, TRI, RBI);
3370
3371 case TargetOpcode::G_INTTOPTR:
3372 // The importer is currently unable to import pointer types since they
3373 // didn't exist in SelectionDAG.
3374 return selectCopy(I, TII, MRI, TRI, RBI);
3375
3376 case TargetOpcode::G_BITCAST:
3377 // Imported SelectionDAG rules can handle every bitcast except those that
3378 // bitcast from a type to the same type. Ideally, these shouldn't occur
3379 // but we might not run an optimizer that deletes them. The other exception
3380 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3381 // of them.
3382 return selectCopy(I, TII, MRI, TRI, RBI);
3383
3384 case TargetOpcode::G_SELECT: {
3385 auto &Sel = cast<GSelect>(I);
3386 const Register CondReg = Sel.getCondReg();
3387 const Register TReg = Sel.getTrueReg();
3388 const Register FReg = Sel.getFalseReg();
3389
3390 if (tryOptSelect(Sel))
3391 return true;
3392
3393 // Make sure to use an unused vreg instead of wzr, so that the peephole
3394 // optimizations will be able to optimize these.
3395 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3396 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3397 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3399 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3400 return false;
3401 Sel.eraseFromParent();
3402 return true;
3403 }
3404 case TargetOpcode::G_ICMP: {
3405 if (Ty.isVector())
3406 return selectVectorICmp(I, MRI);
3407
3408 if (Ty != LLT::scalar(32)) {
3409 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3410 << ", expected: " << LLT::scalar(32) << '\n');
3411 return false;
3412 }
3413
3414 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3415 const AArch64CC::CondCode InvCC =
3417 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3418 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3419 /*Src2=*/AArch64::WZR, InvCC, MIB);
3420 I.eraseFromParent();
3421 return true;
3422 }
3423
3424 case TargetOpcode::G_FCMP: {
3425 CmpInst::Predicate Pred =
3426 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3427 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3428 Pred) ||
3429 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3430 return false;
3431 I.eraseFromParent();
3432 return true;
3433 }
3434 case TargetOpcode::G_VASTART:
3435 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3436 : selectVaStartAAPCS(I, MF, MRI);
3437 case TargetOpcode::G_INTRINSIC:
3438 return selectIntrinsic(I, MRI);
3439 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3440 return selectIntrinsicWithSideEffects(I, MRI);
3441 case TargetOpcode::G_IMPLICIT_DEF: {
3442 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3443 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3444 const Register DstReg = I.getOperand(0).getReg();
3445 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3446 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3447 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3448 return true;
3449 }
3450 case TargetOpcode::G_BLOCK_ADDR: {
3451 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3452 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3453 I.eraseFromParent();
3454 return true;
3455 } else {
3456 I.setDesc(TII.get(AArch64::MOVaddrBA));
3457 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3458 I.getOperand(0).getReg())
3459 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3460 /* Offset */ 0, AArch64II::MO_PAGE)
3462 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3464 I.eraseFromParent();
3465 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3466 }
3467 }
3468 case AArch64::G_DUP: {
3469 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3470 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3471 // difficult because at RBS we may end up pessimizing the fpr case if we
3472 // decided to add an anyextend to fix this. Manual selection is the most
3473 // robust solution for now.
3474 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3475 AArch64::GPRRegBankID)
3476 return false; // We expect the fpr regbank case to be imported.
3477 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3478 if (VecTy == LLT::fixed_vector(8, 8))
3479 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3480 else if (VecTy == LLT::fixed_vector(16, 8))
3481 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3482 else if (VecTy == LLT::fixed_vector(4, 16))
3483 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3484 else if (VecTy == LLT::fixed_vector(8, 16))
3485 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3486 else
3487 return false;
3489 }
3490 case TargetOpcode::G_BUILD_VECTOR:
3491 return selectBuildVector(I, MRI);
3492 case TargetOpcode::G_MERGE_VALUES:
3493 return selectMergeValues(I, MRI);
3494 case TargetOpcode::G_UNMERGE_VALUES:
3495 return selectUnmergeValues(I, MRI);
3496 case TargetOpcode::G_SHUFFLE_VECTOR:
3497 return selectShuffleVector(I, MRI);
3498 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3499 return selectExtractElt(I, MRI);
3500 case TargetOpcode::G_CONCAT_VECTORS:
3501 return selectConcatVectors(I, MRI);
3502 case TargetOpcode::G_JUMP_TABLE:
3503 return selectJumpTable(I, MRI);
3504 case TargetOpcode::G_MEMCPY:
3505 case TargetOpcode::G_MEMCPY_INLINE:
3506 case TargetOpcode::G_MEMMOVE:
3507 case TargetOpcode::G_MEMSET:
3508 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3509 return selectMOPS(I, MRI);
3510 }
3511
3512 return false;
3513}
3514
3515bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3516 MachineIRBuilderState OldMIBState = MIB.getState();
3517 bool Success = select(I);
3518 MIB.setState(OldMIBState);
3519 return Success;
3520}
3521
3522bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3524 unsigned Mopcode;
3525 switch (GI.getOpcode()) {
3526 case TargetOpcode::G_MEMCPY:
3527 case TargetOpcode::G_MEMCPY_INLINE:
3528 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3529 break;
3530 case TargetOpcode::G_MEMMOVE:
3531 Mopcode = AArch64::MOPSMemoryMovePseudo;
3532 break;
3533 case TargetOpcode::G_MEMSET:
3534 // For tagged memset see llvm.aarch64.mops.memset.tag
3535 Mopcode = AArch64::MOPSMemorySetPseudo;
3536 break;
3537 }
3538
3539 auto &DstPtr = GI.getOperand(0);
3540 auto &SrcOrVal = GI.getOperand(1);
3541 auto &Size = GI.getOperand(2);
3542
3543 // Create copies of the registers that can be clobbered.
3544 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3545 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3546 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3547
3548 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3549 const auto &SrcValRegClass =
3550 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3551
3552 // Constrain to specific registers
3553 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3554 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3555 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3556
3557 MIB.buildCopy(DstPtrCopy, DstPtr);
3558 MIB.buildCopy(SrcValCopy, SrcOrVal);
3559 MIB.buildCopy(SizeCopy, Size);
3560
3561 // New instruction uses the copied registers because it must update them.
3562 // The defs are not used since they don't exist in G_MEM*. They are still
3563 // tied.
3564 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3565 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3566 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3567 if (IsSet) {
3568 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3569 {DstPtrCopy, SizeCopy, SrcValCopy});
3570 } else {
3571 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3572 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3573 {DstPtrCopy, SrcValCopy, SizeCopy});
3574 }
3575
3576 GI.eraseFromParent();
3577 return true;
3578}
3579
3580bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3582 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3583 Register JTAddr = I.getOperand(0).getReg();
3584 unsigned JTI = I.getOperand(1).getIndex();
3585 Register Index = I.getOperand(2).getReg();
3586
3587 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3588 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3589
3590 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3591 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3592 {TargetReg, ScratchReg}, {JTAddr, Index})
3593 .addJumpTableIndex(JTI);
3594 // Save the jump table info.
3595 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3596 {static_cast<int64_t>(JTI)});
3597 // Build the indirect branch.
3598 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3599 I.eraseFromParent();
3600 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3601}
3602
3603bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3605 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3606 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3607
3608 Register DstReg = I.getOperand(0).getReg();
3609 unsigned JTI = I.getOperand(1).getIndex();
3610 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3611 auto MovMI =
3612 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3613 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3615 I.eraseFromParent();
3616 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3617}
3618
3619bool AArch64InstructionSelector::selectTLSGlobalValue(
3621 if (!STI.isTargetMachO())
3622 return false;
3623 MachineFunction &MF = *I.getParent()->getParent();
3624 MF.getFrameInfo().setAdjustsStack(true);
3625
3626 const auto &GlobalOp = I.getOperand(1);
3627 assert(GlobalOp.getOffset() == 0 &&
3628 "Shouldn't have an offset on TLS globals!");
3629 const GlobalValue &GV = *GlobalOp.getGlobal();
3630
3631 auto LoadGOT =
3632 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3633 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3634
3635 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3636 {LoadGOT.getReg(0)})
3637 .addImm(0);
3638
3639 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3640 // TLS calls preserve all registers except those that absolutely must be
3641 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3642 // silly).
3643 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3644 .addUse(AArch64::X0, RegState::Implicit)
3645 .addDef(AArch64::X0, RegState::Implicit)
3646 .addRegMask(TRI.getTLSCallPreservedMask());
3647
3648 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3649 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3650 MRI);
3651 I.eraseFromParent();
3652 return true;
3653}
3654
3655bool AArch64InstructionSelector::selectVectorICmp(
3657 Register DstReg = I.getOperand(0).getReg();
3658 LLT DstTy = MRI.getType(DstReg);
3659 Register SrcReg = I.getOperand(2).getReg();
3660 Register Src2Reg = I.getOperand(3).getReg();
3661 LLT SrcTy = MRI.getType(SrcReg);
3662
3663 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3664 unsigned NumElts = DstTy.getNumElements();
3665
3666 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3667 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3668 // Third index is cc opcode:
3669 // 0 == eq
3670 // 1 == ugt
3671 // 2 == uge
3672 // 3 == ult
3673 // 4 == ule
3674 // 5 == sgt
3675 // 6 == sge
3676 // 7 == slt
3677 // 8 == sle
3678 // ne is done by negating 'eq' result.
3679
3680 // This table below assumes that for some comparisons the operands will be
3681 // commuted.
3682 // ult op == commute + ugt op
3683 // ule op == commute + uge op
3684 // slt op == commute + sgt op
3685 // sle op == commute + sge op
3686 unsigned PredIdx = 0;
3687 bool SwapOperands = false;
3688 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3689 switch (Pred) {
3690 case CmpInst::ICMP_NE:
3691 case CmpInst::ICMP_EQ:
3692 PredIdx = 0;
3693 break;
3694 case CmpInst::ICMP_UGT:
3695 PredIdx = 1;
3696 break;
3697 case CmpInst::ICMP_UGE:
3698 PredIdx = 2;
3699 break;
3700 case CmpInst::ICMP_ULT:
3701 PredIdx = 3;
3702 SwapOperands = true;
3703 break;
3704 case CmpInst::ICMP_ULE:
3705 PredIdx = 4;
3706 SwapOperands = true;
3707 break;
3708 case CmpInst::ICMP_SGT:
3709 PredIdx = 5;
3710 break;
3711 case CmpInst::ICMP_SGE:
3712 PredIdx = 6;
3713 break;
3714 case CmpInst::ICMP_SLT:
3715 PredIdx = 7;
3716 SwapOperands = true;
3717 break;
3718 case CmpInst::ICMP_SLE:
3719 PredIdx = 8;
3720 SwapOperands = true;
3721 break;
3722 default:
3723 llvm_unreachable("Unhandled icmp predicate");
3724 return false;
3725 }
3726
3727 // This table obviously should be tablegen'd when we have our GISel native
3728 // tablegen selector.
3729
3730 static const unsigned OpcTable[4][4][9] = {
3731 {
3732 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3733 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3734 0 /* invalid */},
3735 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3736 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3737 0 /* invalid */},
3738 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3739 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3740 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3741 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3742 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3743 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3744 },
3745 {
3746 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3747 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3748 0 /* invalid */},
3749 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3750 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3751 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3752 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3753 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3754 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3755 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3756 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3757 0 /* invalid */}
3758 },
3759 {
3760 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3761 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3762 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3763 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3764 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3765 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3766 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3767 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768 0 /* invalid */},
3769 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3770 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3771 0 /* invalid */}
3772 },
3773 {
3774 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3775 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3776 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3777 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3778 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3779 0 /* invalid */},
3780 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3781 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3782 0 /* invalid */},
3783 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3784 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3785 0 /* invalid */}
3786 },
3787 };
3788 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3789 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3790 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3791 if (!Opc) {
3792 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3793 return false;
3794 }
3795
3796 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3797 const TargetRegisterClass *SrcRC =
3798 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3799 if (!SrcRC) {
3800 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3801 return false;
3802 }
3803
3804 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3805 if (SrcTy.getSizeInBits() == 128)
3806 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3807
3808 if (SwapOperands)
3809 std::swap(SrcReg, Src2Reg);
3810
3811 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3813
3814 // Invert if we had a 'ne' cc.
3815 if (NotOpc) {
3816 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3818 } else {
3819 MIB.buildCopy(DstReg, Cmp.getReg(0));
3820 }
3821 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3822 I.eraseFromParent();
3823 return true;
3824}
3825
3826MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3827 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3828 MachineIRBuilder &MIRBuilder) const {
3829 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3830
3831 auto BuildFn = [&](unsigned SubregIndex) {
3832 auto Ins =
3833 MIRBuilder
3834 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3835 .addImm(SubregIndex);
3838 return &*Ins;
3839 };
3840
3841 switch (EltSize) {
3842 case 8:
3843 return BuildFn(AArch64::bsub);
3844 case 16:
3845 return BuildFn(AArch64::hsub);
3846 case 32:
3847 return BuildFn(AArch64::ssub);
3848 case 64:
3849 return BuildFn(AArch64::dsub);
3850 default:
3851 return nullptr;
3852 }
3853}
3854
3856AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3857 MachineIRBuilder &MIB,
3858 MachineRegisterInfo &MRI) const {
3859 LLT DstTy = MRI.getType(DstReg);
3860 const TargetRegisterClass *RC =
3861 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3862 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3863 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3864 return nullptr;
3865 }
3866 unsigned SubReg = 0;
3867 if (!getSubRegForClass(RC, TRI, SubReg))
3868 return nullptr;
3869 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3870 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3871 << DstTy.getSizeInBits() << "\n");
3872 return nullptr;
3873 }
3874 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3875 .addReg(SrcReg, 0, SubReg);
3876 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3877 return Copy;
3878}
3879
3880bool AArch64InstructionSelector::selectMergeValues(
3882 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3883 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3884 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3885 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3886 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3887
3888 if (I.getNumOperands() != 3)
3889 return false;
3890
3891 // Merging 2 s64s into an s128.
3892 if (DstTy == LLT::scalar(128)) {
3893 if (SrcTy.getSizeInBits() != 64)
3894 return false;
3895 Register DstReg = I.getOperand(0).getReg();
3896 Register Src1Reg = I.getOperand(1).getReg();
3897 Register Src2Reg = I.getOperand(2).getReg();
3898 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3899 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3900 /* LaneIdx */ 0, RB, MIB);
3901 if (!InsMI)
3902 return false;
3903 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3904 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3905 if (!Ins2MI)
3906 return false;
3909 I.eraseFromParent();
3910 return true;
3911 }
3912
3913 if (RB.getID() != AArch64::GPRRegBankID)
3914 return false;
3915
3916 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3917 return false;
3918
3919 auto *DstRC = &AArch64::GPR64RegClass;
3920 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3921 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3922 TII.get(TargetOpcode::SUBREG_TO_REG))
3923 .addDef(SubToRegDef)
3924 .addImm(0)
3925 .addUse(I.getOperand(1).getReg())
3926 .addImm(AArch64::sub_32);
3927 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3928 // Need to anyext the second scalar before we can use bfm
3929 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3930 TII.get(TargetOpcode::SUBREG_TO_REG))
3931 .addDef(SubToRegDef2)
3932 .addImm(0)
3933 .addUse(I.getOperand(2).getReg())
3934 .addImm(AArch64::sub_32);
3935 MachineInstr &BFM =
3936 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3937 .addDef(I.getOperand(0).getReg())
3938 .addUse(SubToRegDef)
3939 .addUse(SubToRegDef2)
3940 .addImm(32)
3941 .addImm(31);
3942 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3943 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3945 I.eraseFromParent();
3946 return true;
3947}
3948
3949static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3950 const unsigned EltSize) {
3951 // Choose a lane copy opcode and subregister based off of the size of the
3952 // vector's elements.
3953 switch (EltSize) {
3954 case 8:
3955 CopyOpc = AArch64::DUPi8;
3956 ExtractSubReg = AArch64::bsub;
3957 break;
3958 case 16:
3959 CopyOpc = AArch64::DUPi16;
3960 ExtractSubReg = AArch64::hsub;
3961 break;
3962 case 32:
3963 CopyOpc = AArch64::DUPi32;
3964 ExtractSubReg = AArch64::ssub;
3965 break;
3966 case 64:
3967 CopyOpc = AArch64::DUPi64;
3968 ExtractSubReg = AArch64::dsub;
3969 break;
3970 default:
3971 // Unknown size, bail out.
3972 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3973 return false;
3974 }
3975 return true;
3976}
3977
3978MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3979 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3980 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3981 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3982 unsigned CopyOpc = 0;
3983 unsigned ExtractSubReg = 0;
3984 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3985 LLVM_DEBUG(
3986 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3987 return nullptr;
3988 }
3989
3990 const TargetRegisterClass *DstRC =
3991 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3992 if (!DstRC) {
3993 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3994 return nullptr;
3995 }
3996
3997 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3998 const LLT &VecTy = MRI.getType(VecReg);
3999 const TargetRegisterClass *VecRC =
4000 getRegClassForTypeOnBank(VecTy, VecRB, true);
4001 if (!VecRC) {
4002 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4003 return nullptr;
4004 }
4005
4006 // The register that we're going to copy into.
4007 Register InsertReg = VecReg;
4008 if (!DstReg)
4009 DstReg = MRI.createVirtualRegister(DstRC);
4010 // If the lane index is 0, we just use a subregister COPY.
4011 if (LaneIdx == 0) {
4012 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4013 .addReg(VecReg, 0, ExtractSubReg);
4014 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4015 return &*Copy;
4016 }
4017
4018 // Lane copies require 128-bit wide registers. If we're dealing with an
4019 // unpacked vector, then we need to move up to that width. Insert an implicit
4020 // def and a subregister insert to get us there.
4021 if (VecTy.getSizeInBits() != 128) {
4022 MachineInstr *ScalarToVector = emitScalarToVector(
4023 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4024 if (!ScalarToVector)
4025 return nullptr;
4026 InsertReg = ScalarToVector->getOperand(0).getReg();
4027 }
4028
4029 MachineInstr *LaneCopyMI =
4030 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4031 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4032
4033 // Make sure that we actually constrain the initial copy.
4034 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4035 return LaneCopyMI;
4036}
4037
4038bool AArch64InstructionSelector::selectExtractElt(
4040 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4041 "unexpected opcode!");
4042 Register DstReg = I.getOperand(0).getReg();
4043 const LLT NarrowTy = MRI.getType(DstReg);
4044 const Register SrcReg = I.getOperand(1).getReg();
4045 const LLT WideTy = MRI.getType(SrcReg);
4046 (void)WideTy;
4047 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4048 "source register size too small!");
4049 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4050
4051 // Need the lane index to determine the correct copy opcode.
4052 MachineOperand &LaneIdxOp = I.getOperand(2);
4053 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4054
4055 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4056 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4057 return false;
4058 }
4059
4060 // Find the index to extract from.
4061 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4062 if (!VRegAndVal)
4063 return false;
4064 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4065
4066
4067 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4068 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4069 LaneIdx, MIB);
4070 if (!Extract)
4071 return false;
4072
4073 I.eraseFromParent();
4074 return true;
4075}
4076
4077bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4079 unsigned NumElts = I.getNumOperands() - 1;
4080 Register SrcReg = I.getOperand(NumElts).getReg();
4081 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4082 const LLT SrcTy = MRI.getType(SrcReg);
4083
4084 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4085 if (SrcTy.getSizeInBits() > 128) {
4086 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4087 return false;
4088 }
4089
4090 // We implement a split vector operation by treating the sub-vectors as
4091 // scalars and extracting them.
4092 const RegisterBank &DstRB =
4093 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4094 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4095 Register Dst = I.getOperand(OpIdx).getReg();
4096 MachineInstr *Extract =
4097 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4098 if (!Extract)
4099 return false;
4100 }
4101 I.eraseFromParent();
4102 return true;
4103}
4104
4105bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4107 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4108 "unexpected opcode");
4109
4110 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4111 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4112 AArch64::FPRRegBankID ||
4113 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4114 AArch64::FPRRegBankID) {
4115 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4116 "currently unsupported.\n");
4117 return false;
4118 }
4119
4120 // The last operand is the vector source register, and every other operand is
4121 // a register to unpack into.
4122 unsigned NumElts = I.getNumOperands() - 1;
4123 Register SrcReg = I.getOperand(NumElts).getReg();
4124 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4125 const LLT WideTy = MRI.getType(SrcReg);
4126 (void)WideTy;
4127 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4128 "can only unmerge from vector or s128 types!");
4129 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4130 "source register size too small!");
4131
4132 if (!NarrowTy.isScalar())
4133 return selectSplitVectorUnmerge(I, MRI);
4134
4135 // Choose a lane copy opcode and subregister based off of the size of the
4136 // vector's elements.
4137 unsigned CopyOpc = 0;
4138 unsigned ExtractSubReg = 0;
4139 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4140 return false;
4141
4142 // Set up for the lane copies.
4143 MachineBasicBlock &MBB = *I.getParent();
4144
4145 // Stores the registers we'll be copying from.
4146 SmallVector<Register, 4> InsertRegs;
4147
4148 // We'll use the first register twice, so we only need NumElts-1 registers.
4149 unsigned NumInsertRegs = NumElts - 1;
4150
4151 // If our elements fit into exactly 128 bits, then we can copy from the source
4152 // directly. Otherwise, we need to do a bit of setup with some subregister
4153 // inserts.
4154 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4155 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4156 } else {
4157 // No. We have to perform subregister inserts. For each insert, create an
4158 // implicit def and a subregister insert, and save the register we create.
4159 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4160 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4161 *RBI.getRegBank(SrcReg, MRI, TRI));
4162 unsigned SubReg = 0;
4163 bool Found = getSubRegForClass(RC, TRI, SubReg);
4164 (void)Found;
4165 assert(Found && "expected to find last operand's subeg idx");
4166 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4167 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4168 MachineInstr &ImpDefMI =
4169 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4170 ImpDefReg);
4171
4172 // Now, create the subregister insert from SrcReg.
4173 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4174 MachineInstr &InsMI =
4175 *BuildMI(MBB, I, I.getDebugLoc(),
4176 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4177 .addUse(ImpDefReg)
4178 .addUse(SrcReg)
4179 .addImm(SubReg);
4180
4181 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4183
4184 // Save the register so that we can copy from it after.
4185 InsertRegs.push_back(InsertReg);
4186 }
4187 }
4188
4189 // Now that we've created any necessary subregister inserts, we can
4190 // create the copies.
4191 //
4192 // Perform the first copy separately as a subregister copy.
4193 Register CopyTo = I.getOperand(0).getReg();
4194 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4195 .addReg(InsertRegs[0], 0, ExtractSubReg);
4196 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4197
4198 // Now, perform the remaining copies as vector lane copies.
4199 unsigned LaneIdx = 1;
4200 for (Register InsReg : InsertRegs) {
4201 Register CopyTo = I.getOperand(LaneIdx).getReg();
4202 MachineInstr &CopyInst =
4203 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4204 .addUse(InsReg)
4205 .addImm(LaneIdx);
4206 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4207 ++LaneIdx;
4208 }
4209
4210 // Separately constrain the first copy's destination. Because of the
4211 // limitation in constrainOperandRegClass, we can't guarantee that this will
4212 // actually be constrained. So, do it ourselves using the second operand.
4213 const TargetRegisterClass *RC =
4214 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4215 if (!RC) {
4216 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4217 return false;
4218 }
4219
4220 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4221 I.eraseFromParent();
4222 return true;
4223}
4224
4225bool AArch64InstructionSelector::selectConcatVectors(
4227 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4228 "Unexpected opcode");
4229 Register Dst = I.getOperand(0).getReg();
4230 Register Op1 = I.getOperand(1).getReg();
4231 Register Op2 = I.getOperand(2).getReg();
4232 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4233 if (!ConcatMI)
4234 return false;
4235 I.eraseFromParent();
4236 return true;
4237}
4238
4239unsigned
4240AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4241 MachineFunction &MF) const {
4242 Type *CPTy = CPVal->getType();
4243 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4244
4246 return MCP->getConstantPoolIndex(CPVal, Alignment);
4247}
4248
4249MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4250 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4251 const TargetRegisterClass *RC;
4252 unsigned Opc;
4253 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4254 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4255 switch (Size) {
4256 case 16:
4257 RC = &AArch64::FPR128RegClass;
4258 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4259 break;
4260 case 8:
4261 RC = &AArch64::FPR64RegClass;
4262 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4263 break;
4264 case 4:
4265 RC = &AArch64::FPR32RegClass;
4266 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4267 break;
4268 case 2:
4269 RC = &AArch64::FPR16RegClass;
4270 Opc = AArch64::LDRHui;
4271 break;
4272 default:
4273 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4274 << *CPVal->getType());
4275 return nullptr;
4276 }
4277
4278 MachineInstr *LoadMI = nullptr;
4279 auto &MF = MIRBuilder.getMF();
4280 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4281 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4282 // Use load(literal) for tiny code model.
4283 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4284 } else {
4285 auto Adrp =
4286 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4287 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4288
4289 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4290 .addConstantPoolIndex(
4292
4294 }
4295
4297 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4299 Size, Align(Size)));
4301 return LoadMI;
4302}
4303
4304/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4305/// size and RB.
4306static std::pair<unsigned, unsigned>
4307getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4308 unsigned Opc, SubregIdx;
4309 if (RB.getID() == AArch64::GPRRegBankID) {
4310 if (EltSize == 8) {
4311 Opc = AArch64::INSvi8gpr;
4312 SubregIdx = AArch64::bsub;
4313 } else if (EltSize == 16) {
4314 Opc = AArch64::INSvi16gpr;
4315 SubregIdx = AArch64::ssub;
4316 } else if (EltSize == 32) {
4317 Opc = AArch64::INSvi32gpr;
4318 SubregIdx = AArch64::ssub;
4319 } else if (EltSize == 64) {
4320 Opc = AArch64::INSvi64gpr;
4321 SubregIdx = AArch64::dsub;
4322 } else {
4323 llvm_unreachable("invalid elt size!");
4324 }
4325 } else {
4326 if (EltSize == 8) {
4327 Opc = AArch64::INSvi8lane;
4328 SubregIdx = AArch64::bsub;
4329 } else if (EltSize == 16) {
4330 Opc = AArch64::INSvi16lane;
4331 SubregIdx = AArch64::hsub;
4332 } else if (EltSize == 32) {
4333 Opc = AArch64::INSvi32lane;
4334 SubregIdx = AArch64::ssub;
4335 } else if (EltSize == 64) {
4336 Opc = AArch64::INSvi64lane;
4337 SubregIdx = AArch64::dsub;
4338 } else {
4339 llvm_unreachable("invalid elt size!");
4340 }
4341 }
4342 return std::make_pair(Opc, SubregIdx);
4343}
4344
4345MachineInstr *AArch64InstructionSelector::emitInstr(
4346 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4347 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4348 const ComplexRendererFns &RenderFns) const {
4349 assert(Opcode && "Expected an opcode?");
4350 assert(!isPreISelGenericOpcode(Opcode) &&
4351 "Function should only be used to produce selected instructions!");
4352 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4353 if (RenderFns)
4354 for (auto &Fn : *RenderFns)
4355 Fn(MI);
4357 return &*MI;
4358}
4359
4360MachineInstr *AArch64InstructionSelector::emitAddSub(
4361 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4362 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4363 MachineIRBuilder &MIRBuilder) const {
4364 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4365 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4366 auto Ty = MRI.getType(LHS.getReg());
4367 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4368 unsigned Size = Ty.getSizeInBits();
4369 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4370 bool Is32Bit = Size == 32;
4371
4372 // INSTRri form with positive arithmetic immediate.
4373 if (auto Fns = selectArithImmed(RHS))
4374 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4375 MIRBuilder, Fns);
4376
4377 // INSTRri form with negative arithmetic immediate.
4378 if (auto Fns = selectNegArithImmed(RHS))
4379 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4380 MIRBuilder, Fns);
4381
4382 // INSTRrx form.
4383 if (auto Fns = selectArithExtendedRegister(RHS))
4384 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4385 MIRBuilder, Fns);
4386
4387 // INSTRrs form.
4388 if (auto Fns = selectShiftedRegister(RHS))
4389 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4390 MIRBuilder, Fns);
4391 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4392 MIRBuilder);
4393}
4394
4396AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4397 MachineOperand &RHS,
4398 MachineIRBuilder &MIRBuilder) const {
4399 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4400 {{AArch64::ADDXri, AArch64::ADDWri},
4401 {AArch64::ADDXrs, AArch64::ADDWrs},
4402 {AArch64::ADDXrr, AArch64::ADDWrr},
4403 {AArch64::SUBXri, AArch64::SUBWri},
4404 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4405 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4406}
4407
4409AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4410 MachineOperand &RHS,
4411 MachineIRBuilder &MIRBuilder) const {
4412 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4413 {{AArch64::ADDSXri, AArch64::ADDSWri},
4414 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4415 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4416 {AArch64::SUBSXri, AArch64::SUBSWri},
4417 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4418 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4419}
4420
4422AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4423 MachineOperand &RHS,
4424 MachineIRBuilder &MIRBuilder) const {
4425 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4426 {{AArch64::SUBSXri, AArch64::SUBSWri},
4427 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4428 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4429 {AArch64::ADDSXri, AArch64::ADDSWri},
4430 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4431 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4432}
4433
4435AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4436 MachineOperand &RHS,
4437 MachineIRBuilder &MIRBuilder) const {
4438 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4439 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4440 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4441 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4442 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4443}
4444
4446AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4447 MachineOperand &RHS,
4448 MachineIRBuilder &MIRBuilder) const {
4449 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4450 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4451 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4452 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4453 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4454}
4455
4457AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4458 MachineIRBuilder &MIRBuilder) const {
4459 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4460 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4461 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4462 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4463}
4464
4466AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4467 MachineIRBuilder &MIRBuilder) const {
4468 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4469 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4470 LLT Ty = MRI.getType(LHS.getReg());
4471 unsigned RegSize = Ty.getSizeInBits();
4472 bool Is32Bit = (RegSize == 32);
4473 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4474 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4475 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4476 // ANDS needs a logical immediate for its immediate form. Check if we can
4477 // fold one in.
4478 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4479 int64_t Imm = ValAndVReg->Value.getSExtValue();
4480
4482 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4485 return &*TstMI;
4486 }
4487 }
4488
4489 if (auto Fns = selectLogicalShiftedRegister(RHS))
4490 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4491 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4492}
4493
4494MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4495 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4496 MachineIRBuilder &MIRBuilder) const {
4497 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4498 assert(Predicate.isPredicate() && "Expected predicate?");
4499 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500 LLT CmpTy = MRI.getType(LHS.getReg());
4501 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4502 unsigned Size = CmpTy.getSizeInBits();
4503 (void)Size;
4504 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4505 // Fold the compare into a cmn or tst if possible.
4506 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4507 return FoldCmp;
4508 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4509 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4510}
4511
4512MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4513 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4514 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4515#ifndef NDEBUG
4516 LLT Ty = MRI.getType(Dst);
4517 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4518 "Expected a 32-bit scalar register?");
4519#endif
4520 const Register ZReg = AArch64::WZR;
4521 AArch64CC::CondCode CC1, CC2;
4522 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4523 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4524 if (CC2 == AArch64CC::AL)
4525 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4526 MIRBuilder);
4527 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4528 Register Def1Reg = MRI.createVirtualRegister(RC);
4529 Register Def2Reg = MRI.createVirtualRegister(RC);
4530 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4531 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4532 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4533 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4535 return &*OrMI;
4536}
4537
4538MachineInstr *AArch64InstructionSelector::emitFPCompare(
4539 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4540 std::optional<CmpInst::Predicate> Pred) const {
4541 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4542 LLT Ty = MRI.getType(LHS);
4543 if (Ty.isVector())
4544 return nullptr;
4545 unsigned OpSize = Ty.getSizeInBits();
4546 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4547
4548 // If this is a compare against +0.0, then we don't have
4549 // to explicitly materialize a constant.
4550 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4551 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4552
4553 auto IsEqualityPred = [](CmpInst::Predicate P) {
4554 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4556 };
4557 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4558 // Try commutating the operands.
4559 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4560 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4561 ShouldUseImm = true;
4562 std::swap(LHS, RHS);
4563 }
4564 }
4565 unsigned CmpOpcTbl[2][3] = {
4566 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4567 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4568 unsigned CmpOpc =
4569 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4570
4571 // Partially build the compare. Decide if we need to add a use for the
4572 // third operand based off whether or not we're comparing against 0.0.
4573 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4575 if (!ShouldUseImm)
4576 CmpMI.addUse(RHS);
4578 return &*CmpMI;
4579}
4580
4581MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4582 std::optional<Register> Dst, Register Op1, Register Op2,
4583 MachineIRBuilder &MIRBuilder) const {
4584 // We implement a vector concat by:
4585 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4586 // 2. Insert the upper vector into the destination's upper element
4587 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4588 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4589
4590 const LLT Op1Ty = MRI.getType(Op1);
4591 const LLT Op2Ty = MRI.getType(Op2);
4592
4593 if (Op1Ty != Op2Ty) {
4594 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4595 return nullptr;
4596 }
4597 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4598
4599 if (Op1Ty.getSizeInBits() >= 128) {
4600 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4601 return nullptr;
4602 }
4603
4604 // At the moment we just support 64 bit vector concats.
4605 if (Op1Ty.getSizeInBits() != 64) {
4606 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4607 return nullptr;
4608 }
4609
4610 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4611 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4612 const TargetRegisterClass *DstRC =
4613 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4614
4615 MachineInstr *WidenedOp1 =
4616 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4617 MachineInstr *WidenedOp2 =
4618 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4619 if (!WidenedOp1 || !WidenedOp2) {
4620 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4621 return nullptr;
4622 }
4623
4624 // Now do the insert of the upper element.
4625 unsigned InsertOpc, InsSubRegIdx;
4626 std::tie(InsertOpc, InsSubRegIdx) =
4627 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4628
4629 if (!Dst)
4630 Dst = MRI.createVirtualRegister(DstRC);
4631 auto InsElt =
4632 MIRBuilder
4633 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4634 .addImm(1) /* Lane index */
4635 .addUse(WidenedOp2->getOperand(0).getReg())
4636 .addImm(0);
4638 return &*InsElt;
4639}
4640
4642AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4643 Register Src2, AArch64CC::CondCode Pred,
4644 MachineIRBuilder &MIRBuilder) const {
4645 auto &MRI = *MIRBuilder.getMRI();
4646 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4647 // If we used a register class, then this won't necessarily have an LLT.
4648 // Compute the size based off whether or not we have a class or bank.
4649 unsigned Size;
4650 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4651 Size = TRI.getRegSizeInBits(*RC);
4652 else
4653 Size = MRI.getType(Dst).getSizeInBits();
4654 // Some opcodes use s1.
4655 assert(Size <= 64 && "Expected 64 bits or less only!");
4656 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4657 unsigned Opc = OpcTable[Size == 64];
4658 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4660 return &*CSINC;
4661}
4662
4663MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4664 Register CarryReg) {
4666 unsigned Opcode = I.getOpcode();
4667
4668 // If the instruction is a SUB, we need to negate the carry,
4669 // because borrowing is indicated by carry-flag == 0.
4670 bool NeedsNegatedCarry =
4671 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4672
4673 // If the previous instruction will already produce the correct carry, do not
4674 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4675 // generated during legalization of wide add/sub. This optimization depends on
4676 // these sequences not being interrupted by other instructions.
4677 // We have to select the previous instruction before the carry-using
4678 // instruction is deleted by the calling function, otherwise the previous
4679 // instruction might become dead and would get deleted.
4680 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4681 if (SrcMI == I.getPrevNode()) {
4682 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4683 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4684 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4685 CarrySrcMI->isUnsigned() &&
4686 CarrySrcMI->getCarryOutReg() == CarryReg &&
4687 selectAndRestoreState(*SrcMI))
4688 return nullptr;
4689 }
4690 }
4691
4692 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4693
4694 if (NeedsNegatedCarry) {
4695 // (0 - Carry) sets !C in NZCV when Carry == 1
4696 Register ZReg = AArch64::WZR;
4697 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4698 }
4699
4700 // (Carry - 1) sets !C in NZCV when Carry == 0
4701 auto Fns = select12BitValueWithLeftShift(1);
4702 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4703}
4704
4705bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4707 auto &CarryMI = cast<GAddSubCarryOut>(I);
4708
4709 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4710 // Set NZCV carry according to carry-in VReg
4711 emitCarryIn(I, CarryInMI->getCarryInReg());
4712 }
4713
4714 // Emit the operation and get the correct condition code.
4715 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4716 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4717
4718 Register CarryOutReg = CarryMI.getCarryOutReg();
4719
4720 // Don't convert carry-out to VReg if it is never used
4721 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4722 // Now, put the overflow result in the register given by the first operand
4723 // to the overflow op. CSINC increments the result when the predicate is
4724 // false, so to get the increment when it's true, we need to use the
4725 // inverse. In this case, we want to increment when carry is set.
4726 Register ZReg = AArch64::WZR;
4727 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4728 getInvertedCondCode(OpAndCC.second), MIB);
4729 }
4730
4731 I.eraseFromParent();
4732 return true;
4733}
4734
4735std::pair<MachineInstr *, AArch64CC::CondCode>
4736AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4737 MachineOperand &LHS,
4738 MachineOperand &RHS,
4739 MachineIRBuilder &MIRBuilder) const {
4740 switch (Opcode) {
4741 default:
4742 llvm_unreachable("Unexpected opcode!");
4743 case TargetOpcode::G_SADDO:
4744 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4745 case TargetOpcode::G_UADDO:
4746 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4747 case TargetOpcode::G_SSUBO:
4748 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4749 case TargetOpcode::G_USUBO:
4750 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4751 case TargetOpcode::G_SADDE:
4752 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4753 case TargetOpcode::G_UADDE:
4754 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4755 case TargetOpcode::G_SSUBE:
4756 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4757 case TargetOpcode::G_USUBE:
4758 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4759 }
4760}
4761
4762/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4763/// expressed as a conjunction.
4764/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4765/// changing the conditions on the CMP tests.
4766/// (this means we can call emitConjunctionRec() with
4767/// Negate==true on this sub-tree)
4768/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4769/// cannot do the negation naturally. We are required to
4770/// emit the subtree first in this case.
4771/// \param WillNegate Is true if are called when the result of this
4772/// subexpression must be negated. This happens when the
4773/// outer expression is an OR. We can use this fact to know
4774/// that we have a double negation (or (or ...) ...) that
4775/// can be implemented for free.
4776static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4777 bool WillNegate, MachineRegisterInfo &MRI,
4778 unsigned Depth = 0) {
4779 if (!MRI.hasOneNonDBGUse(Val))
4780 return false;
4781 MachineInstr *ValDef = MRI.getVRegDef(Val);
4782 unsigned Opcode = ValDef->getOpcode();
4783 if (isa<GAnyCmp>(ValDef)) {
4784 CanNegate = true;
4785 MustBeFirst = false;
4786 return true;
4787 }
4788 // Protect against exponential runtime and stack overflow.
4789 if (Depth > 6)
4790 return false;
4791 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4792 bool IsOR = Opcode == TargetOpcode::G_OR;
4793 Register O0 = ValDef->getOperand(1).getReg();
4794 Register O1 = ValDef->getOperand(2).getReg();
4795 bool CanNegateL;
4796 bool MustBeFirstL;
4797 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4798 return false;
4799 bool CanNegateR;
4800 bool MustBeFirstR;
4801 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4802 return false;
4803
4804 if (MustBeFirstL && MustBeFirstR)
4805 return false;
4806
4807 if (IsOR) {
4808 // For an OR expression we need to be able to naturally negate at least
4809 // one side or we cannot do the transformation at all.
4810 if (!CanNegateL && !CanNegateR)
4811 return false;
4812 // If we the result of the OR will be negated and we can naturally negate
4813 // the leaves, then this sub-tree as a whole negates naturally.
4814 CanNegate = WillNegate && CanNegateL && CanNegateR;
4815 // If we cannot naturally negate the whole sub-tree, then this must be
4816 // emitted first.
4817 MustBeFirst = !CanNegate;
4818 } else {
4819 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4820 // We cannot naturally negate an AND operation.
4821 CanNegate = false;
4822 MustBeFirst = MustBeFirstL || MustBeFirstR;
4823 }
4824 return true;
4825 }
4826 return false;
4827}
4828
4829MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4832 MachineIRBuilder &MIB) const {
4833 // TODO: emit CMN as an optimization.
4834 auto &MRI = *MIB.getMRI();
4835 LLT OpTy = MRI.getType(LHS);
4836 unsigned CCmpOpc;
4837 std::optional<ValueAndVReg> C;
4839 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4841 if (C && C->Value.ult(32))
4842 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4843 else
4844 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4845 } else {
4846 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4847 OpTy.getSizeInBits() == 64);
4848 switch (OpTy.getSizeInBits()) {
4849 case 16:
4850 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4851 CCmpOpc = AArch64::FCCMPHrr;
4852 break;
4853 case 32:
4854 CCmpOpc = AArch64::FCCMPSrr;
4855 break;
4856 case 64:
4857 CCmpOpc = AArch64::FCCMPDrr;
4858 break;
4859 default:
4860 return nullptr;
4861 }
4862 }
4864 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4865 auto CCmp =
4866 MIB.buildInstr(CCmpOpc, {}, {LHS});
4867 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4868 CCmp.addImm(C->Value.getZExtValue());
4869 else
4870 CCmp.addReg(RHS);
4871 CCmp.addImm(NZCV).addImm(Predicate);
4873 return &*CCmp;
4874}
4875
4876MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4877 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4878 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4879 // We're at a tree leaf, produce a conditional comparison operation.
4880 auto &MRI = *MIB.getMRI();
4881 MachineInstr *ValDef = MRI.getVRegDef(Val);
4882 unsigned Opcode = ValDef->getOpcode();
4883 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4884 Register LHS = Cmp->getLHSReg();
4885 Register RHS = Cmp->getRHSReg();
4886 CmpInst::Predicate CC = Cmp->getCond();
4887 if (Negate)
4889 if (isa<GICmp>(Cmp)) {
4891 } else {
4892 // Handle special FP cases.
4893 AArch64CC::CondCode ExtraCC;
4894 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4895 // Some floating point conditions can't be tested with a single condition
4896 // code. Construct an additional comparison in this case.
4897 if (ExtraCC != AArch64CC::AL) {
4898 MachineInstr *ExtraCmp;
4899 if (!CCOp)
4900 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4901 else
4902 ExtraCmp =
4903 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4904 CCOp = ExtraCmp->getOperand(0).getReg();
4905 Predicate = ExtraCC;
4906 }
4907 }
4908
4909 // Produce a normal comparison if we are first in the chain
4910 if (!CCOp) {
4911 auto Dst = MRI.cloneVirtualRegister(LHS);
4912 if (isa<GICmp>(Cmp))
4913 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4914 return emitFPCompare(Cmp->getOperand(2).getReg(),
4915 Cmp->getOperand(3).getReg(), MIB);
4916 }
4917 // Otherwise produce a ccmp.
4918 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4919 }
4920 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4921
4922 bool IsOR = Opcode == TargetOpcode::G_OR;
4923
4924 Register LHS = ValDef->getOperand(1).getReg();
4925 bool CanNegateL;
4926 bool MustBeFirstL;
4927 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4928 assert(ValidL && "Valid conjunction/disjunction tree");
4929 (void)ValidL;
4930
4931 Register RHS = ValDef->getOperand(2).getReg();
4932 bool CanNegateR;
4933 bool MustBeFirstR;
4934 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4935 assert(ValidR && "Valid conjunction/disjunction tree");
4936 (void)ValidR;
4937
4938 // Swap sub-tree that must come first to the right side.
4939 if (MustBeFirstL) {
4940 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4941 std::swap(LHS, RHS);
4942 std::swap(CanNegateL, CanNegateR);
4943 std::swap(MustBeFirstL, MustBeFirstR);
4944 }
4945
4946 bool NegateR;
4947 bool NegateAfterR;
4948 bool NegateL;
4949 bool NegateAfterAll;
4950 if (Opcode == TargetOpcode::G_OR) {
4951 // Swap the sub-tree that we can negate naturally to the left.
4952 if (!CanNegateL) {
4953 assert(CanNegateR && "at least one side must be negatable");
4954 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4955 assert(!Negate);
4956 std::swap(LHS, RHS);
4957 NegateR = false;
4958 NegateAfterR = true;
4959 } else {
4960 // Negate the left sub-tree if possible, otherwise negate the result.
4961 NegateR = CanNegateR;
4962 NegateAfterR = !CanNegateR;
4963 }
4964 NegateL = true;
4965 NegateAfterAll = !Negate;
4966 } else {
4967 assert(Opcode == TargetOpcode::G_AND &&
4968 "Valid conjunction/disjunction tree");
4969 assert(!Negate && "Valid conjunction/disjunction tree");
4970
4971 NegateL = false;
4972 NegateR = false;
4973 NegateAfterR = false;
4974 NegateAfterAll = false;
4975 }
4976
4977 // Emit sub-trees.
4978 AArch64CC::CondCode RHSCC;
4979 MachineInstr *CmpR =
4980 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4981 if (NegateAfterR)
4982 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4984 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4985 if (NegateAfterAll)
4986 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4987 return CmpL;
4988}
4989
4990MachineInstr *AArch64InstructionSelector::emitConjunction(
4991 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4992 bool DummyCanNegate;
4993 bool DummyMustBeFirst;
4994 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4995 *MIB.getMRI()))
4996 return nullptr;
4997 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4998}
4999
5000bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5001 MachineInstr &CondMI) {
5002 AArch64CC::CondCode AArch64CC;
5003 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5004 if (!ConjMI)
5005 return false;
5006
5007 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5008 SelI.eraseFromParent();
5009 return true;
5010}
5011
5012bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5013 MachineRegisterInfo &MRI = *MIB.getMRI();
5014 // We want to recognize this pattern:
5015 //
5016 // $z = G_FCMP pred, $x, $y
5017 // ...
5018 // $w = G_SELECT $z, $a, $b
5019 //
5020 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5021 // some copies/truncs in between.)
5022 //
5023 // If we see this, then we can emit something like this:
5024 //
5025 // fcmp $x, $y
5026 // fcsel $w, $a, $b, pred
5027 //
5028 // Rather than emitting both of the rather long sequences in the standard
5029 // G_FCMP/G_SELECT select methods.
5030
5031 // First, check if the condition is defined by a compare.
5032 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5033
5034 // We can only fold if all of the defs have one use.
5035 Register CondDefReg = CondDef->getOperand(0).getReg();
5036 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5037 // Unless it's another select.
5038 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5039 if (CondDef == &UI)
5040 continue;
5041 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5042 return false;
5043 }
5044 }
5045
5046 // Is the condition defined by a compare?
5047 unsigned CondOpc = CondDef->getOpcode();
5048 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5049 if (tryOptSelectConjunction(I, *CondDef))
5050 return true;
5051 return false;
5052 }
5053
5055 if (CondOpc == TargetOpcode::G_ICMP) {
5056 auto Pred =
5057 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5059 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5060 CondDef->getOperand(1), MIB);
5061 } else {
5062 // Get the condition code for the select.
5063 auto Pred =
5064 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5065 AArch64CC::CondCode CondCode2;
5066 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5067
5068 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5069 // instructions to emit the comparison.
5070 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5071 // unnecessary.
5072 if (CondCode2 != AArch64CC::AL)
5073 return false;
5074
5075 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5076 CondDef->getOperand(3).getReg(), MIB)) {
5077 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5078 return false;
5079 }
5080 }
5081
5082 // Emit the select.
5083 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5084 I.getOperand(3).getReg(), CondCode, MIB);
5085 I.eraseFromParent();
5086 return true;
5087}
5088
5089MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5090 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5091 MachineIRBuilder &MIRBuilder) const {
5092 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5093 "Unexpected MachineOperand");
5094 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5095 // We want to find this sort of thing:
5096 // x = G_SUB 0, y
5097 // G_ICMP z, x
5098 //
5099 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5100 // e.g:
5101 //
5102 // cmn z, y
5103
5104 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5105 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5106 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5107 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5108 // Given this:
5109 //
5110 // x = G_SUB 0, y
5111 // G_ICMP x, z
5112 //
5113 // Produce this:
5114 //
5115 // cmn y, z
5116 if (isCMN(LHSDef, P, MRI))
5117 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5118
5119 // Same idea here, but with the RHS of the compare instead:
5120 //
5121 // Given this:
5122 //
5123 // x = G_SUB 0, y
5124 // G_ICMP z, x
5125 //
5126 // Produce this:
5127 //
5128 // cmn z, y
5129 if (isCMN(RHSDef, P, MRI))
5130 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5131
5132 // Given this:
5133 //
5134 // z = G_AND x, y
5135 // G_ICMP z, 0
5136 //
5137 // Produce this if the compare is signed:
5138 //
5139 // tst x, y
5140 if (!CmpInst::isUnsigned(P) && LHSDef &&
5141 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5142 // Make sure that the RHS is 0.
5143 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5144 if (!ValAndVReg || ValAndVReg->Value != 0)
5145 return nullptr;
5146
5147 return emitTST(LHSDef->getOperand(1),
5148 LHSDef->getOperand(2), MIRBuilder);
5149 }
5150
5151 return nullptr;
5152}
5153
5154bool AArch64InstructionSelector::selectShuffleVector(
5156 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5157 Register Src1Reg = I.getOperand(1).getReg();
5158 const LLT Src1Ty = MRI.getType(Src1Reg);
5159 Register Src2Reg = I.getOperand(2).getReg();
5160 const LLT Src2Ty = MRI.getType(Src2Reg);
5161 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5162
5163 MachineBasicBlock &MBB = *I.getParent();
5164 MachineFunction &MF = *MBB.getParent();
5165 LLVMContext &Ctx = MF.getFunction().getContext();
5166
5167 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5168 // it's originated from a <1 x T> type. Those should have been lowered into
5169 // G_BUILD_VECTOR earlier.
5170 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5171 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5172 return false;
5173 }
5174
5175 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5176
5178 for (int Val : Mask) {
5179 // For now, any undef indexes we'll just assume to be 0. This should be
5180 // optimized in future, e.g. to select DUP etc.
5181 Val = Val < 0 ? 0 : Val;
5182 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5183 unsigned Offset = Byte + Val * BytesPerElt;
5184 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5185 }
5186 }
5187
5188 // Use a constant pool to load the index vector for TBL.
5189 Constant *CPVal = ConstantVector::get(CstIdxs);
5190 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5191 if (!IndexLoad) {
5192 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5193 return false;
5194 }
5195
5196 if (DstTy.getSizeInBits() != 128) {
5197 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5198 // This case can be done with TBL1.
5200 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5201 if (!Concat) {
5202 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5203 return false;
5204 }
5205
5206 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5207 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5208 IndexLoad->getOperand(0).getReg(), MIB);
5209
5210 auto TBL1 = MIB.buildInstr(
5211 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5212 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5214
5215 auto Copy =
5216 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5217 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5218 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5219 I.eraseFromParent();
5220 return true;
5221 }
5222
5223 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5224 // Q registers for regalloc.
5225 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5226 auto RegSeq = createQTuple(Regs, MIB);
5227 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5228 {RegSeq, IndexLoad->getOperand(0)});
5230 I.eraseFromParent();
5231 return true;
5232}
5233
5234MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5235 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5236 unsigned LaneIdx, const RegisterBank &RB,
5237 MachineIRBuilder &MIRBuilder) const {
5238 MachineInstr *InsElt = nullptr;
5239 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5240 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5241
5242 // Create a register to define with the insert if one wasn't passed in.
5243 if (!DstReg)
5244 DstReg = MRI.createVirtualRegister(DstRC);
5245
5246 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5247 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5248
5249 if (RB.getID() == AArch64::FPRRegBankID) {
5250 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5251 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5252 .addImm(LaneIdx)
5253 .addUse(InsSub->getOperand(0).getReg())
5254 .addImm(0);
5255 } else {
5256 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5257 .addImm(LaneIdx)
5258 .addUse(EltReg);
5259 }
5260
5262 return InsElt;
5263}
5264
5265bool AArch64InstructionSelector::selectUSMovFromExtend(
5267 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5268 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5269 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5270 return false;
5271 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5272 const Register DefReg = MI.getOperand(0).getReg();
5273 const LLT DstTy = MRI.getType(DefReg);
5274 unsigned DstSize = DstTy.getSizeInBits();
5275
5276 if (DstSize != 32 && DstSize != 64)
5277 return false;
5278
5279 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5280 MI.getOperand(1).getReg(), MRI);
5281 int64_t Lane;
5282 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5283 return false;
5284 Register Src0 = Extract->getOperand(1).getReg();
5285
5286 const LLT &VecTy = MRI.getType(Src0);
5287
5288 if (VecTy.getSizeInBits() != 128) {
5289 const MachineInstr *ScalarToVector = emitScalarToVector(
5290 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5291 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5292 Src0 = ScalarToVector->getOperand(0).getReg();
5293 }
5294
5295 unsigned Opcode;
5296 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5297 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5298 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5299 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5300 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5301 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5302 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5303 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5304 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5305 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5306 else
5307 llvm_unreachable("Unexpected type combo for S/UMov!");
5308
5309 // We may need to generate one of these, depending on the type and sign of the
5310 // input:
5311 // DstReg = SMOV Src0, Lane;
5312 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5313 MachineInstr *ExtI = nullptr;
5314 if (DstSize == 64 && !IsSigned) {
5315 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5316 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5317 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5318 .addImm(0)
5319 .addUse(NewReg)
5320 .addImm(AArch64::sub_32);
5321 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5322 } else
5323 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5324
5326 MI.eraseFromParent();
5327 return true;
5328}
5329
5330MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5331 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5332 unsigned int Op;
5333 if (DstSize == 128) {
5334 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5335 return nullptr;
5336 Op = AArch64::MOVIv16b_ns;
5337 } else {
5338 Op = AArch64::MOVIv8b_ns;
5339 }
5340
5341 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5342
5345 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5347 return &*Mov;
5348 }
5349 return nullptr;
5350}
5351
5352MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5353 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5354 bool Inv) {
5355
5356 unsigned int Op;
5357 if (DstSize == 128) {
5358 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5359 return nullptr;
5360 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5361 } else {
5362 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5363 }
5364
5365 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5366 uint64_t Shift;
5367
5370 Shift = 0;
5371 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5373 Shift = 8;
5374 } else
5375 return nullptr;
5376
5377 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5379 return &*Mov;
5380}
5381
5382MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5383 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5384 bool Inv) {
5385
5386 unsigned int Op;
5387 if (DstSize == 128) {
5388 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5389 return nullptr;
5390 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5391 } else {
5392 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5393 }
5394
5395 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5396 uint64_t Shift;
5397
5400 Shift = 0;
5401 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5403 Shift = 8;
5404 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5406 Shift = 16;
5407 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5409 Shift = 24;
5410 } else
5411 return nullptr;
5412
5413 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5415 return &*Mov;
5416}
5417
5418MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5419 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5420
5421 unsigned int Op;
5422 if (DstSize == 128) {
5423 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5424 return nullptr;
5425 Op = AArch64::MOVIv2d_ns;
5426 } else {
5427 Op = AArch64::MOVID;
5428 }
5429
5430 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5433 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5435 return &*Mov;
5436 }
5437 return nullptr;
5438}
5439
5440MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5441 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5442 bool Inv) {
5443
5444 unsigned int Op;
5445 if (DstSize == 128) {
5446 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5447 return nullptr;
5448 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5449 } else {
5450 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5451 }
5452
5453 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5454 uint64_t Shift;
5455
5458 Shift = 264;
5459 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5461 Shift = 272;
5462 } else
5463 return nullptr;
5464
5465 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5467 return &*Mov;
5468}
5469
5470MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5471 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5472
5473 unsigned int Op;
5474 bool IsWide = false;
5475 if (DstSize == 128) {
5476 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5477 return nullptr;
5478 Op = AArch64::FMOVv4f32_ns;
5479 IsWide = true;
5480 } else {
5481 Op = AArch64::FMOVv2f32_ns;
5482 }
5483
5484 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5485
5488 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5490 Op = AArch64::FMOVv2f64_ns;
5491 } else
5492 return nullptr;
5493
5494 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5496 return &*Mov;
5497}
5498
5499bool AArch64InstructionSelector::selectIndexedExtLoad(
5501 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5502 Register Dst = ExtLd.getDstReg();
5503 Register WriteBack = ExtLd.getWritebackReg();
5504 Register Base = ExtLd.getBaseReg();
5505 Register Offset = ExtLd.getOffsetReg();
5506 LLT Ty = MRI.getType(Dst);
5507 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5508 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5509 bool IsPre = ExtLd.isPre();
5510 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5511 bool InsertIntoXReg = false;
5512 bool IsDst64 = Ty.getSizeInBits() == 64;
5513
5514 unsigned Opc = 0;
5515 LLT NewLdDstTy;
5516 LLT s32 = LLT::scalar(32);
5517 LLT s64 = LLT::scalar(64);
5518
5519 if (MemSizeBits == 8) {
5520 if (IsSExt) {
5521 if (IsDst64)
5522 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5523 else
5524 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5525 NewLdDstTy = IsDst64 ? s64 : s32;
5526 } else {
5527 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5528 InsertIntoXReg = IsDst64;
5529 NewLdDstTy = s32;
5530 }
5531 } else if (MemSizeBits == 16) {
5532 if (IsSExt) {
5533 if (IsDst64)
5534 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5535 else
5536 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5537 NewLdDstTy = IsDst64 ? s64 : s32;
5538 } else {
5539 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5540 InsertIntoXReg = IsDst64;
5541 NewLdDstTy = s32;
5542 }
5543 } else if (MemSizeBits == 32) {
5544 if (IsSExt) {
5545 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5546 NewLdDstTy = s64;
5547 } else {
5548 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5549 InsertIntoXReg = IsDst64;
5550 NewLdDstTy = s32;
5551 }
5552 } else {
5553 llvm_unreachable("Unexpected size for indexed load");
5554 }
5555
5556 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5557 return false; // We should be on gpr.
5558
5559 auto Cst = getIConstantVRegVal(Offset, MRI);
5560 if (!Cst)
5561 return false; // Shouldn't happen, but just in case.
5562
5563 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5564 .addImm(Cst->getSExtValue());
5565 LdMI.cloneMemRefs(ExtLd);
5567 // Make sure to select the load with the MemTy as the dest type, and then
5568 // insert into X reg if needed.
5569 if (InsertIntoXReg) {
5570 // Generate a SUBREG_TO_REG.
5571 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5572 .addImm(0)
5573 .addUse(LdMI.getReg(1))
5574 .addImm(AArch64::sub_32);
5575 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5576 MRI);
5577 } else {
5578 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5579 selectCopy(*Copy, TII, MRI, TRI, RBI);
5580 }
5581 MI.eraseFromParent();
5582
5583 return true;
5584}
5585
5586bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5588 auto &Ld = cast<GIndexedLoad>(MI);
5589 Register Dst = Ld.getDstReg();
5590 Register WriteBack = Ld.getWritebackReg();
5591 Register Base = Ld.getBaseReg();
5592 Register Offset = Ld.getOffsetReg();
5593 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5594 "Unexpected type for indexed load");
5595 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5596
5597 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5598 return selectIndexedExtLoad(MI, MRI);
5599
5600 unsigned Opc = 0;
5601 if (Ld.isPre()) {
5602 static constexpr unsigned GPROpcodes[] = {
5603 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5604 AArch64::LDRXpre};
5605 static constexpr unsigned FPROpcodes[] = {
5606 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5607 AArch64::LDRQpre};
5608 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5609 Opc = FPROpcodes[Log2_32(MemSize)];
5610 else
5611 Opc = GPROpcodes[Log2_32(MemSize)];
5612 } else {
5613 static constexpr unsigned GPROpcodes[] = {
5614 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5615 AArch64::LDRXpost};
5616 static constexpr unsigned FPROpcodes[] = {
5617 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5618 AArch64::LDRDpost, AArch64::LDRQpost};
5619 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5620 Opc = FPROpcodes[Log2_32(MemSize)];
5621 else
5622 Opc = GPROpcodes[Log2_32(MemSize)];
5623 }
5624 auto Cst = getIConstantVRegVal(Offset, MRI);
5625 if (!Cst)
5626 return false; // Shouldn't happen, but just in case.
5627 auto LdMI =
5628 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5629 LdMI.cloneMemRefs(Ld);
5631 MI.eraseFromParent();
5632 return true;
5633}
5634
5635bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5637 Register Dst = I.getWritebackReg();
5638 Register Val = I.getValueReg();
5639 Register Base = I.getBaseReg();
5640 Register Offset = I.getOffsetReg();
5641 LLT ValTy = MRI.getType(Val);
5642 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5643
5644 unsigned Opc = 0;
5645 if (I.isPre()) {
5646 static constexpr unsigned GPROpcodes[] = {
5647 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5648 AArch64::STRXpre};
5649 static constexpr unsigned FPROpcodes[] = {
5650 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5651 AArch64::STRQpre};
5652
5653 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5654 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5655 else
5656 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5657 } else {
5658 static constexpr unsigned GPROpcodes[] = {
5659 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5660 AArch64::STRXpost};
5661 static constexpr unsigned FPROpcodes[] = {
5662 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5663 AArch64::STRDpost, AArch64::STRQpost};
5664
5665 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5666 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5667 else
5668 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5669 }
5670
5671 auto Cst = getIConstantVRegVal(Offset, MRI);
5672 if (!Cst)
5673 return false; // Shouldn't happen, but just in case.
5674 auto Str =
5675 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5676 Str.cloneMemRefs(I);
5678 I.eraseFromParent();
5679 return true;
5680}
5681
5683AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5684 MachineIRBuilder &MIRBuilder,
5686 LLT DstTy = MRI.getType(Dst);
5687 unsigned DstSize = DstTy.getSizeInBits();
5688 if (CV->isNullValue()) {
5689 if (DstSize == 128) {
5690 auto Mov =
5691 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5693 return &*Mov;
5694 }
5695
5696 if (DstSize == 64) {
5697 auto Mov =
5698 MIRBuilder
5699 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5700 .addImm(0);
5701 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5702 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5703 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5704 return &*Copy;
5705 }
5706 }
5707
5708 if (CV->getSplatValue()) {
5709 APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5710 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5711 MachineInstr *NewOp;
5712 bool Inv = false;
5713 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5714 (NewOp =
5715 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5716 (NewOp =
5717 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5718 (NewOp =
5719 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5720 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5721 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5722 return NewOp;
5723
5724 DefBits = ~DefBits;
5725 Inv = true;
5726 if ((NewOp =
5727 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5728 (NewOp =
5729 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5730 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5731 return NewOp;
5732 return nullptr;
5733 };
5734
5735 if (auto *NewOp = TryMOVIWithBits(DefBits))
5736 return NewOp;
5737
5738 // See if a fneg of the constant can be materialized with a MOVI, etc
5739 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5740 unsigned NegOpc) -> MachineInstr * {
5741 // FNegate each sub-element of the constant
5742 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5743 APInt NegBits(DstSize, 0);
5744 unsigned NumElts = DstSize / NumBits;
5745 for (unsigned i = 0; i < NumElts; i++)
5746 NegBits |= Neg << (NumBits * i);
5747 NegBits = DefBits ^ NegBits;
5748
5749 // Try to create the new constants with MOVI, and if so generate a fneg
5750 // for it.
5751 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5752 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5753 NewOp->getOperand(0).setReg(NewDst);
5754 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5755 }
5756 return nullptr;
5757 };
5758 MachineInstr *R;
5759 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5760 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5761 (STI.hasFullFP16() &&
5762 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5763 return R;
5764 }
5765
5766 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5767 if (!CPLoad) {
5768 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5769 return nullptr;
5770 }
5771
5772 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5773 RBI.constrainGenericRegister(
5774 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5775 return &*Copy;
5776}
5777
5778bool AArch64InstructionSelector::tryOptConstantBuildVec(
5780 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5781 unsigned DstSize = DstTy.getSizeInBits();
5782 assert(DstSize <= 128 && "Unexpected build_vec type!");
5783 if (DstSize < 32)
5784 return false;
5785 // Check if we're building a constant vector, in which case we want to
5786 // generate a constant pool load instead of a vector insert sequence.
5788 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5789 // Try to find G_CONSTANT or G_FCONSTANT
5790 auto *OpMI =
5791 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5792 if (OpMI)
5793 Csts.emplace_back(
5794 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5795 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5796 I.getOperand(Idx).getReg(), MRI)))
5797 Csts.emplace_back(
5798 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5799 else
5800 return false;
5801 }
5802 Constant *CV = ConstantVector::get(Csts);
5803 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5804 return false;
5805 I.eraseFromParent();
5806 return true;
5807}
5808
5809bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5811 // Given:
5812 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5813 //
5814 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5815 Register Dst = I.getOperand(0).getReg();
5816 Register EltReg = I.getOperand(1).getReg();
5817 LLT EltTy = MRI.getType(EltReg);
5818 // If the index isn't on the same bank as its elements, then this can't be a
5819 // SUBREG_TO_REG.
5820 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5821 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5822 if (EltRB != DstRB)
5823 return false;
5824 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5825 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5826 }))
5827 return false;
5828 unsigned SubReg;
5829 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5830 if (!EltRC)
5831 return false;
5832 const TargetRegisterClass *DstRC =
5833 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5834 if (!DstRC)
5835 return false;
5836 if (!getSubRegForClass(EltRC, TRI, SubReg))
5837 return false;
5838 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5839 .addImm(0)
5840 .addUse(EltReg)
5841 .addImm(SubReg);
5842 I.eraseFromParent();
5843 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5844 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5845}
5846
5847bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5849 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5850 // Until we port more of the optimized selections, for now just use a vector
5851 // insert sequence.
5852 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5853 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5854 unsigned EltSize = EltTy.getSizeInBits();
5855
5856 if (tryOptConstantBuildVec(I, DstTy, MRI))
5857 return true;
5858 if (tryOptBuildVecToSubregToReg(I, MRI))
5859 return true;
5860
5861 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5862 return false; // Don't support all element types yet.
5863 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5864
5865 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5866 MachineInstr *ScalarToVec =
5867 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5868 I.getOperand(1).getReg(), MIB);
5869 if (!ScalarToVec)
5870 return false;
5871
5872 Register DstVec = ScalarToVec->getOperand(0).getReg();
5873 unsigned DstSize = DstTy.getSizeInBits();
5874
5875 // Keep track of the last MI we inserted. Later on, we might be able to save
5876 // a copy using it.
5877 MachineInstr *PrevMI = ScalarToVec;
5878 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5879 // Note that if we don't do a subregister copy, we can end up making an
5880 // extra register.
5881 Register OpReg = I.getOperand(i).getReg();
5882 // Do not emit inserts for undefs
5883 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5884 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5885 DstVec = PrevMI->getOperand(0).getReg();
5886 }
5887 }
5888
5889 // If DstTy's size in bits is less than 128, then emit a subregister copy
5890 // from DstVec to the last register we've defined.
5891 if (DstSize < 128) {
5892 // Force this to be FPR using the destination vector.
5893 const TargetRegisterClass *RC =
5894 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5895 if (!RC)
5896 return false;
5897 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5898 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5899 return false;
5900 }
5901
5902 unsigned SubReg = 0;
5903 if (!getSubRegForClass(RC, TRI, SubReg))
5904 return false;
5905 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5906 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5907 << "\n");
5908 return false;
5909 }
5910
5911 Register Reg = MRI.createVirtualRegister(RC);
5912 Register DstReg = I.getOperand(0).getReg();
5913
5914 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5915 MachineOperand &RegOp = I.getOperand(1);
5916 RegOp.setReg(Reg);
5917 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5918 } else {
5919 // We either have a vector with all elements (except the first one) undef or
5920 // at least one non-undef non-first element. In the first case, we need to
5921 // constrain the output register ourselves as we may have generated an
5922 // INSERT_SUBREG operation which is a generic operation for which the
5923 // output regclass cannot be automatically chosen.
5924 //
5925 // In the second case, there is no need to do this as it may generate an
5926 // instruction like INSvi32gpr where the regclass can be automatically
5927 // chosen.
5928 //
5929 // Also, we save a copy by re-using the destination register on the final
5930 // insert.
5931 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5933
5934 Register DstReg = PrevMI->getOperand(0).getReg();
5935 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5936 const TargetRegisterClass *RC =
5937 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5938 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5939 }
5940 }
5941
5942 I.eraseFromParent();
5943 return true;
5944}
5945
5946bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5947 unsigned NumVecs,
5948 MachineInstr &I) {
5949 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5950 assert(Opc && "Expected an opcode?");
5951 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5952 auto &MRI = *MIB.getMRI();
5953 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5954 unsigned Size = Ty.getSizeInBits();
5955 assert((Size == 64 || Size == 128) &&
5956 "Destination must be 64 bits or 128 bits?");
5957 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5958 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5959 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5960 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5961 Load.cloneMemRefs(I);
5963 Register SelectedLoadDst = Load->getOperand(0).getReg();
5964 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5965 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5966 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5967 // Emit the subreg copies and immediately select them.
5968 // FIXME: We should refactor our copy code into an emitCopy helper and
5969 // clean up uses of this pattern elsewhere in the selector.
5970 selectCopy(*Vec, TII, MRI, TRI, RBI);
5971 }
5972 return true;
5973}
5974
5975bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5976 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5977 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5978 assert(Opc && "Expected an opcode?");
5979 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5980 auto &MRI = *MIB.getMRI();
5981 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5982 bool Narrow = Ty.getSizeInBits() == 64;
5983
5984 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5985 SmallVector<Register, 4> Regs(NumVecs);
5986 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5987 [](auto MO) { return MO.getReg(); });
5988
5989 if (Narrow) {
5990 transform(Regs, Regs.begin(), [this](Register Reg) {
5991 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5992 ->getOperand(0)
5993 .getReg();
5994 });
5995 Ty = Ty.multiplyElements(2);
5996 }
5997
5998 Register Tuple = createQTuple(Regs, MIB);
5999 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6000 if (!LaneNo)
6001 return false;
6002
6003 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6004 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6005 .addReg(Tuple)
6006 .addImm(LaneNo->getZExtValue())
6007 .addReg(Ptr);
6008 Load.cloneMemRefs(I);
6010 Register SelectedLoadDst = Load->getOperand(0).getReg();
6011 unsigned SubReg = AArch64::qsub0;
6012 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6013 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6014 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6015 : DstOp(I.getOperand(Idx).getReg())},
6016 {})
6017 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6018 Register WideReg = Vec.getReg(0);
6019 // Emit the subreg copies and immediately select them.
6020 selectCopy(*Vec, TII, MRI, TRI, RBI);
6021 if (Narrow &&
6022 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6023 return false;
6024 }
6025 return true;
6026}
6027
6028void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6029 unsigned NumVecs,
6030 unsigned Opc) {
6031 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6032 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6033 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6034
6035 SmallVector<Register, 2> Regs(NumVecs);
6036 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6037 Regs.begin(), [](auto MO) { return MO.getReg(); });
6038
6039 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6040 : createDTuple(Regs, MIB);
6041 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6042 Store.cloneMemRefs(I);
6044}
6045
6046bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6047 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6048 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6049 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6050 bool Narrow = Ty.getSizeInBits() == 64;
6051
6052 SmallVector<Register, 2> Regs(NumVecs);
6053 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6054 Regs.begin(), [](auto MO) { return MO.getReg(); });
6055
6056 if (Narrow)
6057 transform(Regs, Regs.begin(), [this](Register Reg) {
6058 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6059 ->getOperand(0)
6060 .getReg();
6061 });
6062
6063 Register Tuple = createQTuple(Regs, MIB);
6064
6065 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6066 if (!LaneNo)
6067 return false;
6068 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6069 auto Store = MIB.buildInstr(Opc, {}, {})
6070 .addReg(Tuple)
6071 .addImm(LaneNo->getZExtValue())
6072 .addReg(Ptr);
6073 Store.cloneMemRefs(I);
6075 return true;
6076}
6077
6078bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6080 // Find the intrinsic ID.
6081 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6082
6083 const LLT S8 = LLT::scalar(8);
6084 const LLT S16 = LLT::scalar(16);
6085 const LLT S32 = LLT::scalar(32);
6086 const LLT S64 = LLT::scalar(64);
6087 const LLT P0 = LLT::pointer(0, 64);
6088 // Select the instruction.
6089 switch (IntrinID) {
6090 default:
6091 return false;
6092 case Intrinsic::aarch64_ldxp:
6093 case Intrinsic::aarch64_ldaxp: {
6094 auto NewI = MIB.buildInstr(
6095 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6096 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6097 {I.getOperand(3)});
6098 NewI.cloneMemRefs(I);
6100 break;
6101 }
6102 case Intrinsic::aarch64_neon_ld1x2: {
6103 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6104 unsigned Opc = 0;
6105 if (Ty == LLT::fixed_vector(8, S8))
6106 Opc = AArch64::LD1Twov8b;
6107 else if (Ty == LLT::fixed_vector(16, S8))
6108 Opc = AArch64::LD1Twov16b;
6109 else if (Ty == LLT::fixed_vector(4, S16))
6110 Opc = AArch64::LD1Twov4h;
6111 else if (Ty == LLT::fixed_vector(8, S16))
6112 Opc = AArch64::LD1Twov8h;
6113 else if (Ty == LLT::fixed_vector(2, S32))
6114 Opc = AArch64::LD1Twov2s;
6115 else if (Ty == LLT::fixed_vector(4, S32))
6116 Opc = AArch64::LD1Twov4s;
6117 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6118 Opc = AArch64::LD1Twov2d;
6119 else if (Ty == S64 || Ty == P0)
6120 Opc = AArch64::LD1Twov1d;
6121 else
6122 llvm_unreachable("Unexpected type for ld1x2!");
6123 selectVectorLoadIntrinsic(Opc, 2, I);
6124 break;
6125 }
6126 case Intrinsic::aarch64_neon_ld1x3: {
6127 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6128 unsigned Opc = 0;
6129 if (Ty == LLT::fixed_vector(8, S8))
6130 Opc = AArch64::LD1Threev8b;
6131 else if (Ty == LLT::fixed_vector(16, S8))
6132 Opc = AArch64::LD1Threev16b;
6133 else if (Ty == LLT::fixed_vector(4, S16))
6134 Opc = AArch64::LD1Threev4h;
6135 else if (Ty == LLT::fixed_vector(8, S16))
6136 Opc = AArch64::LD1Threev8h;
6137 else if (Ty == LLT::fixed_vector(2, S32))
6138 Opc = AArch64::LD1Threev2s;
6139 else if (Ty == LLT::fixed_vector(4, S32))
6140 Opc = AArch64::LD1Threev4s;
6141 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6142 Opc = AArch64::LD1Threev2d;
6143 else if (Ty == S64 || Ty == P0)
6144 Opc = AArch64::LD1Threev1d;
6145 else
6146 llvm_unreachable("Unexpected type for ld1x3!");
6147 selectVectorLoadIntrinsic(Opc, 3, I);
6148 break;
6149 }
6150 case Intrinsic::aarch64_neon_ld1x4: {
6151 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6152 unsigned Opc = 0;
6153 if (Ty == LLT::fixed_vector(8, S8))
6154 Opc = AArch64::LD1Fourv8b;
6155 else if (Ty == LLT::fixed_vector(16, S8))
6156 Opc = AArch64::LD1Fourv16b;
6157 else if (Ty == LLT::fixed_vector(4, S16))
6158 Opc = AArch64::LD1Fourv4h;
6159 else if (Ty == LLT::fixed_vector(8, S16))
6160 Opc = AArch64::LD1Fourv8h;
6161 else if (Ty == LLT::fixed_vector(2, S32))
6162 Opc = AArch64::LD1Fourv2s;
6163 else if (Ty == LLT::fixed_vector(4, S32))
6164 Opc = AArch64::LD1Fourv4s;
6165 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6166 Opc = AArch64::LD1Fourv2d;
6167 else if (Ty == S64 || Ty == P0)
6168 Opc = AArch64::LD1Fourv1d;
6169 else
6170 llvm_unreachable("Unexpected type for ld1x4!");
6171 selectVectorLoadIntrinsic(Opc, 4, I);
6172 break;
6173 }
6174 case Intrinsic::aarch64_neon_ld2: {
6175 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6176 unsigned Opc = 0;
6177 if (Ty == LLT::fixed_vector(8, S8))
6178 Opc = AArch64::LD2Twov8b;
6179 else if (Ty == LLT::fixed_vector(16, S8))
6180 Opc = AArch64::LD2Twov16b;
6181 else if (Ty == LLT::fixed_vector(4, S16))
6182 Opc = AArch64::LD2Twov4h;
6183 else if (Ty == LLT::fixed_vector(8, S16))
6184 Opc = AArch64::LD2Twov8h;
6185 else if (Ty == LLT::fixed_vector(2, S32))
6186 Opc = AArch64::LD2Twov2s;
6187 else if (Ty == LLT::fixed_vector(4, S32))
6188 Opc = AArch64::LD2Twov4s;
6189 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6190 Opc = AArch64::LD2Twov2d;
6191 else if (Ty == S64 || Ty == P0)
6192 Opc = AArch64::LD1Twov1d;
6193 else
6194 llvm_unreachable("Unexpected type for ld2!");
6195 selectVectorLoadIntrinsic(Opc, 2, I);
6196 break;
6197 }
6198 case Intrinsic::aarch64_neon_ld2lane: {
6199 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6200 unsigned Opc;
6201 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6202 Opc = AArch64::LD2i8;
6203 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6204 Opc = AArch64::LD2i16;
6205 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6206 Opc = AArch64::LD2i32;
6207 else if (Ty == LLT::fixed_vector(2, S64) ||
6208 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6209 Opc = AArch64::LD2i64;
6210 else
6211 llvm_unreachable("Unexpected type for st2lane!");
6212 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6213 return false;
6214 break;
6215 }
6216 case Intrinsic::aarch64_neon_ld2r: {
6217 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6218 unsigned Opc = 0;
6219 if (Ty == LLT::fixed_vector(8, S8))
6220 Opc = AArch64::LD2Rv8b;
6221 else if (Ty == LLT::fixed_vector(16, S8))
6222 Opc = AArch64::LD2Rv16b;
6223 else if (Ty == LLT::fixed_vector(4, S16))
6224 Opc = AArch64::LD2Rv4h;
6225 else if (Ty == LLT::fixed_vector(8, S16))
6226 Opc = AArch64::LD2Rv8h;
6227 else if (Ty == LLT::fixed_vector(2, S32))
6228 Opc = AArch64::LD2Rv2s;
6229 else if (Ty == LLT::fixed_vector(4, S32))
6230 Opc = AArch64::LD2Rv4s;
6231 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6232 Opc = AArch64::LD2Rv2d;
6233 else if (Ty == S64 || Ty == P0)
6234 Opc = AArch64::LD2Rv1d;
6235 else
6236 llvm_unreachable("Unexpected type for ld2r!");
6237 selectVectorLoadIntrinsic(Opc, 2, I);
6238 break;
6239 }
6240 case Intrinsic::aarch64_neon_ld3: {
6241 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6242 unsigned Opc = 0;
6243 if (Ty == LLT::fixed_vector(8, S8))
6244 Opc = AArch64::LD3Threev8b;
6245 else if (Ty == LLT::fixed_vector(16, S8))
6246 Opc = AArch64::LD3Threev16b;
6247 else if (Ty == LLT::fixed_vector(4, S16))
6248 Opc = AArch64::LD3Threev4h;
6249 else if (Ty == LLT::fixed_vector(8, S16))
6250 Opc = AArch64::LD3Threev8h;
6251 else if (Ty == LLT::fixed_vector(2, S32))
6252 Opc = AArch64::LD3Threev2s;
6253 else if (Ty == LLT::fixed_vector(4, S32))
6254 Opc = AArch64::LD3Threev4s;
6255 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6256 Opc = AArch64::LD3Threev2d;
6257 else if (Ty == S64 || Ty == P0)
6258 Opc = AArch64::LD1Threev1d;
6259 else
6260 llvm_unreachable("Unexpected type for ld3!");
6261 selectVectorLoadIntrinsic(Opc, 3, I);
6262 break;
6263 }
6264 case Intrinsic::aarch64_neon_ld3lane: {
6265 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6266 unsigned Opc;
6267 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6268 Opc = AArch64::LD3i8;
6269 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6270 Opc = AArch64::LD3i16;
6271 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6272 Opc = AArch64::LD3i32;
6273 else if (Ty == LLT::fixed_vector(2, S64) ||
6274 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6275 Opc = AArch64::LD3i64;
6276 else
6277 llvm_unreachable("Unexpected type for st3lane!");
6278 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6279 return false;
6280 break;
6281 }
6282 case Intrinsic::aarch64_neon_ld3r: {
6283 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6284 unsigned Opc = 0;
6285 if (Ty == LLT::fixed_vector(8, S8))
6286 Opc = AArch64::LD3Rv8b;
6287 else if (Ty == LLT::fixed_vector(16, S8))
6288 Opc = AArch64::LD3Rv16b;
6289 else if (Ty == LLT::fixed_vector(4, S16))
6290 Opc = AArch64::LD3Rv4h;
6291 else if (Ty == LLT::fixed_vector(8, S16))
6292 Opc = AArch64::LD3Rv8h;
6293 else if (Ty == LLT::fixed_vector(2, S32))
6294 Opc = AArch64::LD3Rv2s;
6295 else if (Ty == LLT::fixed_vector(4, S32))
6296 Opc = AArch64::LD3Rv4s;
6297 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6298 Opc = AArch64::LD3Rv2d;
6299 else if (Ty == S64 || Ty == P0)
6300 Opc = AArch64::LD3Rv1d;
6301 else
6302 llvm_unreachable("Unexpected type for ld3r!");
6303 selectVectorLoadIntrinsic(Opc, 3, I);
6304 break;
6305 }
6306 case Intrinsic::aarch64_neon_ld4: {
6307 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6308 unsigned Opc = 0;
6309 if (Ty == LLT::fixed_vector(8, S8))
6310 Opc = AArch64::LD4Fourv8b;
6311 else if (Ty == LLT::fixed_vector(16, S8))
6312 Opc = AArch64::LD4Fourv16b;
6313 else if (Ty == LLT::fixed_vector(4, S16))
6314 Opc = AArch64::LD4Fourv4h;
6315 else if (Ty == LLT::fixed_vector(8, S16))
6316 Opc = AArch64::LD4Fourv8h;
6317 else if (Ty == LLT::fixed_vector(2, S32))
6318 Opc = AArch64::LD4Fourv2s;
6319 else if (Ty == LLT::fixed_vector(4, S32))
6320 Opc = AArch64::LD4Fourv4s;
6321 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6322 Opc = AArch64::LD4Fourv2d;
6323 else if (Ty == S64 || Ty == P0)
6324 Opc = AArch64::LD1Fourv1d;
6325 else
6326 llvm_unreachable("Unexpected type for ld4!");
6327 selectVectorLoadIntrinsic(Opc, 4, I);
6328 break;
6329 }
6330 case Intrinsic::aarch64_neon_ld4lane: {
6331 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6332 unsigned Opc;
6333 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6334 Opc = AArch64::LD4i8;
6335 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6336 Opc = AArch64::LD4i16;
6337 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6338 Opc = AArch64::LD4i32;
6339 else if (Ty == LLT::fixed_vector(2, S64) ||
6340 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6341 Opc = AArch64::LD4i64;
6342 else
6343 llvm_unreachable("Unexpected type for st4lane!");
6344 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6345 return false;
6346 break;
6347 }
6348 case Intrinsic::aarch64_neon_ld4r: {
6349 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6350 unsigned Opc = 0;
6351 if (Ty == LLT::fixed_vector(8, S8))
6352 Opc = AArch64::LD4Rv8b;
6353 else if (Ty == LLT::fixed_vector(16, S8))
6354 Opc = AArch64::LD4Rv16b;
6355 else if (Ty == LLT::fixed_vector(4, S16))
6356 Opc = AArch64::LD4Rv4h;
6357 else if (Ty == LLT::fixed_vector(8, S16))
6358 Opc = AArch64::LD4Rv8h;
6359 else if (Ty == LLT::fixed_vector(2, S32))
6360 Opc = AArch64::LD4Rv2s;
6361 else if (Ty == LLT::fixed_vector(4, S32))
6362 Opc = AArch64::LD4Rv4s;
6363 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6364 Opc = AArch64::LD4Rv2d;
6365 else if (Ty == S64 || Ty == P0)
6366 Opc = AArch64::LD4Rv1d;
6367 else
6368 llvm_unreachable("Unexpected type for ld4r!");
6369 selectVectorLoadIntrinsic(Opc, 4, I);
6370 break;
6371 }
6372 case Intrinsic::aarch64_neon_st1x2: {
6373 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6374 unsigned Opc;
6375 if (Ty == LLT::fixed_vector(8, S8))
6376 Opc = AArch64::ST1Twov8b;
6377 else if (Ty == LLT::fixed_vector(16, S8))
6378 Opc = AArch64::ST1Twov16b;
6379 else if (Ty == LLT::fixed_vector(4, S16))
6380 Opc = AArch64::ST1Twov4h;
6381 else if (Ty == LLT::fixed_vector(8, S16))
6382 Opc = AArch64::ST1Twov8h;
6383 else if (Ty == LLT::fixed_vector(2, S32))
6384 Opc = AArch64::ST1Twov2s;
6385 else if (Ty == LLT::fixed_vector(4, S32))
6386 Opc = AArch64::ST1Twov4s;
6387 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6388 Opc = AArch64::ST1Twov2d;
6389 else if (Ty == S64 || Ty == P0)
6390 Opc = AArch64::ST1Twov1d;
6391 else
6392 llvm_unreachable("Unexpected type for st1x2!");
6393 selectVectorStoreIntrinsic(I, 2, Opc);
6394 break;
6395 }
6396 case Intrinsic::aarch64_neon_st1x3: {
6397 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6398 unsigned Opc;
6399 if (Ty == LLT::fixed_vector(8, S8))
6400 Opc = AArch64::ST1Threev8b;
6401 else if (Ty == LLT::fixed_vector(16, S8))
6402 Opc = AArch64::ST1Threev16b;
6403 else if (Ty == LLT::fixed_vector(4, S16))
6404 Opc = AArch64::ST1Threev4h;
6405 else if (Ty == LLT::fixed_vector(8, S16))
6406 Opc = AArch64::ST1Threev8h;
6407 else if (Ty == LLT::fixed_vector(2, S32))
6408 Opc = AArch64::ST1Threev2s;
6409 else if (Ty == LLT::fixed_vector(4, S32))
6410 Opc = AArch64::ST1Threev4s;
6411 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6412 Opc = AArch64::ST1Threev2d;
6413 else if (Ty == S64 || Ty == P0)
6414 Opc = AArch64::ST1Threev1d;
6415 else
6416 llvm_unreachable("Unexpected type for st1x3!");
6417 selectVectorStoreIntrinsic(I, 3, Opc);
6418 break;
6419 }
6420 case Intrinsic::aarch64_neon_st1x4: {
6421 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6422 unsigned Opc;
6423 if (Ty == LLT::fixed_vector(8, S8))
6424 Opc = AArch64::ST1Fourv8b;
6425 else if (Ty == LLT::fixed_vector(16, S8))
6426 Opc = AArch64::ST1Fourv16b;
6427 else if (Ty == LLT::fixed_vector(4, S16))
6428 Opc = AArch64::ST1Fourv4h;
6429 else if (Ty == LLT::fixed_vector(8, S16))
6430 Opc = AArch64::ST1Fourv8h;
6431 else if (Ty == LLT::fixed_vector(2, S32))
6432 Opc = AArch64::ST1Fourv2s;
6433 else if (Ty == LLT::fixed_vector(4, S32))
6434 Opc = AArch64::ST1Fourv4s;
6435 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6436 Opc = AArch64::ST1Fourv2d;
6437 else if (Ty == S64 || Ty == P0)
6438 Opc = AArch64::ST1Fourv1d;
6439 else
6440 llvm_unreachable("Unexpected type for st1x4!");
6441 selectVectorStoreIntrinsic(I, 4, Opc);
6442 break;
6443 }
6444 case Intrinsic::aarch64_neon_st2: {
6445 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6446 unsigned Opc;
6447 if (Ty == LLT::fixed_vector(8, S8))
6448 Opc = AArch64::ST2Twov8b;
6449 else if (Ty == LLT::fixed_vector(16, S8))
6450 Opc = AArch64::ST2Twov16b;
6451 else if (Ty == LLT::fixed_vector(4, S16))
6452 Opc = AArch64::ST2Twov4h;
6453 else if (Ty == LLT::fixed_vector(8, S16))
6454 Opc = AArch64::ST2Twov8h;
6455 else if (Ty == LLT::fixed_vector(2, S32))
6456 Opc = AArch64::ST2Twov2s;
6457 else if (Ty == LLT::fixed_vector(4, S32))
6458 Opc = AArch64::ST2Twov4s;
6459 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6460 Opc = AArch64::ST2Twov2d;
6461 else if (Ty == S64 || Ty == P0)
6462 Opc = AArch64::ST1Twov1d;
6463 else
6464 llvm_unreachable("Unexpected type for st2!");
6465 selectVectorStoreIntrinsic(I, 2, Opc);
6466 break;
6467 }
6468 case Intrinsic::aarch64_neon_st3: {
6469 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6470 unsigned Opc;
6471 if (Ty == LLT::fixed_vector(8, S8))
6472 Opc = AArch64::ST3Threev8b;
6473 else if (Ty == LLT::fixed_vector(16, S8))
6474 Opc = AArch64::ST3Threev16b;
6475 else if (Ty == LLT::fixed_vector(4, S16))
6476 Opc = AArch64::ST3Threev4h;
6477 else if (Ty == LLT::fixed_vector(8, S16))
6478 Opc = AArch64::ST3Threev8h;
6479 else if (Ty == LLT::fixed_vector(2, S32))
6480 Opc = AArch64::ST3Threev2s;
6481 else if (Ty == LLT::fixed_vector(4, S32))
6482 Opc = AArch64::ST3Threev4s;
6483 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6484 Opc = AArch64::ST3Threev2d;
6485 else if (Ty == S64 || Ty == P0)
6486 Opc = AArch64::ST1Threev1d;
6487 else
6488 llvm_unreachable("Unexpected type for st3!");
6489 selectVectorStoreIntrinsic(I, 3, Opc);
6490 break;
6491 }
6492 case Intrinsic::aarch64_neon_st4: {
6493 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6494 unsigned Opc;
6495 if (Ty == LLT::fixed_vector(8, S8))
6496 Opc = AArch64::ST4Fourv8b;
6497 else if (Ty == LLT::fixed_vector(16, S8))
6498 Opc = AArch64::ST4Fourv16b;
6499 else if (Ty == LLT::fixed_vector(4, S16))
6500 Opc = AArch64::ST4Fourv4h;
6501 else if (Ty == LLT::fixed_vector(8, S16))
6502 Opc = AArch64::ST4Fourv8h;
6503 else if (Ty == LLT::fixed_vector(2, S32))
6504 Opc = AArch64::ST4Fourv2s;
6505 else if (Ty == LLT::fixed_vector(4, S32))
6506 Opc = AArch64::ST4Fourv4s;
6507 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6508 Opc = AArch64::ST4Fourv2d;
6509 else if (Ty == S64 || Ty == P0)
6510 Opc = AArch64::ST1Fourv1d;
6511 else
6512 llvm_unreachable("Unexpected type for st4!");
6513 selectVectorStoreIntrinsic(I, 4, Opc);
6514 break;
6515 }
6516 case Intrinsic::aarch64_neon_st2lane: {
6517 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6518 unsigned Opc;
6519 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6520 Opc = AArch64::ST2i8;
6521 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6522 Opc = AArch64::ST2i16;
6523 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6524 Opc = AArch64::ST2i32;
6525 else if (Ty == LLT::fixed_vector(2, S64) ||
6526 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6527 Opc = AArch64::ST2i64;
6528 else
6529 llvm_unreachable("Unexpected type for st2lane!");
6530 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6531 return false;
6532 break;
6533 }
6534 case Intrinsic::aarch64_neon_st3lane: {
6535 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6536 unsigned Opc;
6537 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6538 Opc = AArch64::ST3i8;
6539 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6540 Opc = AArch64::ST3i16;
6541 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6542 Opc = AArch64::ST3i32;
6543 else if (Ty == LLT::fixed_vector(2, S64) ||
6544 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6545 Opc = AArch64::ST3i64;
6546 else
6547 llvm_unreachable("Unexpected type for st3lane!");
6548 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6549 return false;
6550 break;
6551 }
6552 case Intrinsic::aarch64_neon_st4lane: {
6553 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6554 unsigned Opc;
6555 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6556 Opc = AArch64::ST4i8;
6557 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6558 Opc = AArch64::ST4i16;
6559 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6560 Opc = AArch64::ST4i32;
6561 else if (Ty == LLT::fixed_vector(2, S64) ||
6562 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6563 Opc = AArch64::ST4i64;
6564 else
6565 llvm_unreachable("Unexpected type for st4lane!");
6566 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6567 return false;
6568 break;
6569 }
6570 case Intrinsic::aarch64_mops_memset_tag: {
6571 // Transform
6572 // %dst:gpr(p0) = \
6573 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6574 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6575 // where %dst is updated, into
6576 // %Rd:GPR64common, %Rn:GPR64) = \
6577 // MOPSMemorySetTaggingPseudo \
6578 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6579 // where Rd and Rn are tied.
6580 // It is expected that %val has been extended to s64 in legalization.
6581 // Note that the order of the size/value operands are swapped.
6582
6583 Register DstDef = I.getOperand(0).getReg();
6584 // I.getOperand(1) is the intrinsic function
6585 Register DstUse = I.getOperand(2).getReg();
6586 Register ValUse = I.getOperand(3).getReg();
6587 Register SizeUse = I.getOperand(4).getReg();
6588
6589 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6590 // Therefore an additional virtual register is requried for the updated size
6591 // operand. This value is not accessible via the semantics of the intrinsic.
6592 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6593
6594 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6595 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6596 Memset.cloneMemRefs(I);
6598 break;
6599 }
6600 }
6601
6602 I.eraseFromParent();
6603 return true;
6604}
6605
6606bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6608 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6609
6610 switch (IntrinID) {
6611 default:
6612 break;
6613 case Intrinsic::aarch64_crypto_sha1h: {
6614 Register DstReg = I.getOperand(0).getReg();
6615 Register SrcReg = I.getOperand(2).getReg();
6616
6617 // FIXME: Should this be an assert?
6618 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6619 MRI.getType(SrcReg).getSizeInBits() != 32)
6620 return false;
6621
6622 // The operation has to happen on FPRs. Set up some new FPR registers for
6623 // the source and destination if they are on GPRs.
6624 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6625 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6626 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6627
6628 // Make sure the copy ends up getting constrained properly.
6629 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6630 AArch64::GPR32RegClass, MRI);
6631 }
6632
6633 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6634 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6635
6636 // Actually insert the instruction.
6637 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6638 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6639
6640 // Did we create a new register for the destination?
6641 if (DstReg != I.getOperand(0).getReg()) {
6642 // Yep. Copy the result of the instruction back into the original
6643 // destination.
6644 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6645 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6646 AArch64::GPR32RegClass, MRI);
6647 }
6648
6649 I.eraseFromParent();
6650 return true;
6651 }
6652 case Intrinsic::frameaddress:
6653 case Intrinsic::returnaddress: {
6654 MachineFunction &MF = *I.getParent()->getParent();
6655 MachineFrameInfo &MFI = MF.getFrameInfo();
6656
6657 unsigned Depth = I.getOperand(2).getImm();
6658 Register DstReg = I.getOperand(0).getReg();
6659 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6660
6661 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6662 if (!MFReturnAddr) {
6663 // Insert the copy from LR/X30 into the entry block, before it can be
6664 // clobbered by anything.
6665 MFI.setReturnAddressIsTaken(true);
6666 MFReturnAddr = getFunctionLiveInPhysReg(
6667 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6668 }
6669
6670 if (STI.hasPAuth()) {
6671 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6672 } else {
6673 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6674 MIB.buildInstr(AArch64::XPACLRI);
6675 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6676 }
6677
6678 I.eraseFromParent();
6679 return true;
6680 }
6681
6682 MFI.setFrameAddressIsTaken(true);
6683 Register FrameAddr(AArch64::FP);
6684 while (Depth--) {
6685 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6686 auto Ldr =
6687 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6689 FrameAddr = NextFrame;
6690 }
6691
6692 if (IntrinID == Intrinsic::frameaddress)
6693 MIB.buildCopy({DstReg}, {FrameAddr});
6694 else {
6695 MFI.setReturnAddressIsTaken(true);
6696
6697 if (STI.hasPAuth()) {
6698 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6699 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6700 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6701 } else {
6702 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6703 .addImm(1);
6704 MIB.buildInstr(AArch64::XPACLRI);
6705 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6706 }
6707 }
6708
6709 I.eraseFromParent();
6710 return true;
6711 }
6712 case Intrinsic::swift_async_context_addr:
6713 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6714 {Register(AArch64::FP)})
6715 .addImm(8)
6716 .addImm(0);
6718
6720 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6721 I.eraseFromParent();
6722 return true;
6723 }
6724 return false;
6725}
6726
6728AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6729 auto MaybeImmed = getImmedFromMO(Root);
6730 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6731 return std::nullopt;
6732 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6733 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6734}
6735
6737AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6738 auto MaybeImmed = getImmedFromMO(Root);
6739 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6740 return std::nullopt;
6741 uint64_t Enc = 31 - *MaybeImmed;
6742 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6743}
6744
6746AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6747 auto MaybeImmed = getImmedFromMO(Root);
6748 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6749 return std::nullopt;
6750 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6751 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6752}
6753
6755AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6756 auto MaybeImmed = getImmedFromMO(Root);
6757 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6758 return std::nullopt;
6759 uint64_t Enc = 63 - *MaybeImmed;
6760 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6761}
6762
6763/// Helper to select an immediate value that can be represented as a 12-bit
6764/// value shifted left by either 0 or 12. If it is possible to do so, return
6765/// the immediate and shift value. If not, return std::nullopt.
6766///
6767/// Used by selectArithImmed and selectNegArithImmed.
6769AArch64InstructionSelector::select12BitValueWithLeftShift(
6770 uint64_t Immed) const {
6771 unsigned ShiftAmt;
6772 if (Immed >> 12 == 0) {
6773 ShiftAmt = 0;
6774 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6775 ShiftAmt = 12;
6776 Immed = Immed >> 12;
6777 } else
6778 return std::nullopt;
6779
6780 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6781 return {{
6782 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6783 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6784 }};
6785}
6786
6787/// SelectArithImmed - Select an immediate value that can be represented as
6788/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6789/// Val set to the 12-bit value and Shift set to the shifter operand.
6791AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6792 // This function is called from the addsub_shifted_imm ComplexPattern,
6793 // which lists [imm] as the list of opcode it's interested in, however
6794 // we still need to check whether the operand is actually an immediate
6795 // here because the ComplexPattern opcode list is only used in
6796 // root-level opcode matching.
6797 auto MaybeImmed = getImmedFromMO(Root);
6798 if (MaybeImmed == std::nullopt)
6799 return std::nullopt;
6800 return select12BitValueWithLeftShift(*MaybeImmed);
6801}
6802
6803/// SelectNegArithImmed - As above, but negates the value before trying to
6804/// select it.
6806AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6807 // We need a register here, because we need to know if we have a 64 or 32
6808 // bit immediate.
6809 if (!Root.isReg())
6810 return std::nullopt;
6811 auto MaybeImmed = getImmedFromMO(Root);
6812 if (MaybeImmed == std::nullopt)
6813 return std::nullopt;
6814 uint64_t Immed = *MaybeImmed;
6815
6816 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6817 // have the opposite effect on the C flag, so this pattern mustn't match under
6818 // those circumstances.
6819 if (Immed == 0)
6820 return std::nullopt;
6821
6822 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6823 // the root.
6825 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6826 Immed = ~((uint32_t)Immed) + 1;
6827 else
6828 Immed = ~Immed + 1ULL;
6829
6830 if (Immed & 0xFFFFFFFFFF000000ULL)
6831 return std::nullopt;
6832
6833 Immed &= 0xFFFFFFULL;
6834 return select12BitValueWithLeftShift(Immed);
6835}
6836
6837/// Return true if it is worth folding MI into an extended register. That is,
6838/// if it's safe to pull it into the addressing mode of a load or store as a
6839/// shift.
6840bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6841 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6842 // Always fold if there is one use, or if we're optimizing for size.
6843 Register DefReg = MI.getOperand(0).getReg();
6844 if (MRI.hasOneNonDBGUse(DefReg) ||
6845 MI.getParent()->getParent()->getFunction().hasOptSize())
6846 return true;
6847
6848 // FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
6849 // appropriate.
6850
6851 // We have a fastpath, so folding a shift in and potentially computing it
6852 // many times may be beneficial. Check if this is only used in memory ops.
6853 // If it is, then we should fold.
6854 return all_of(MRI.use_nodbg_instructions(DefReg),
6855 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6856}
6857
6859 switch (Type) {
6860 case AArch64_AM::SXTB:
6861 case AArch64_AM::SXTH:
6862 case AArch64_AM::SXTW:
6863 return true;
6864 default:
6865 return false;
6866 }
6867}
6868
6870AArch64InstructionSelector::selectExtendedSHL(
6872 unsigned SizeInBytes, bool WantsExt) const {
6873 assert(Base.isReg() && "Expected base to be a register operand");
6874 assert(Offset.isReg() && "Expected offset to be a register operand");
6875
6877 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6878
6879 unsigned OffsetOpc = OffsetInst->getOpcode();
6880 bool LookedThroughZExt = false;
6881 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6882 // Try to look through a ZEXT.
6883 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6884 return std::nullopt;
6885
6886 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6887 OffsetOpc = OffsetInst->getOpcode();
6888 LookedThroughZExt = true;
6889
6890 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6891 return std::nullopt;
6892 }
6893 // Make sure that the memory op is a valid size.
6894 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6895 if (LegalShiftVal == 0)
6896 return std::nullopt;
6897 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6898 return std::nullopt;
6899
6900 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6901 // register we will offset is the LHS, and the register containing the
6902 // constant is the RHS.
6903 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6904 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6905 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6906 if (!ValAndVReg) {
6907 // We didn't get a constant on the RHS. If the opcode is a shift, then
6908 // we're done.
6909 if (OffsetOpc == TargetOpcode::G_SHL)
6910 return std::nullopt;
6911
6912 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6913 std::swap(OffsetReg, ConstantReg);
6914 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6915 if (!ValAndVReg)
6916 return std::nullopt;
6917 }
6918
6919 // The value must fit into 3 bits, and must be positive. Make sure that is
6920 // true.
6921 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6922
6923 // Since we're going to pull this into a shift, the constant value must be
6924 // a power of 2. If we got a multiply, then we need to check this.
6925 if (OffsetOpc == TargetOpcode::G_MUL) {
6926 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6927 return std::nullopt;
6928
6929 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6930 ImmVal = Log2_32(ImmVal);
6931 }
6932
6933 if ((ImmVal & 0x7) != ImmVal)
6934 return std::nullopt;
6935
6936 // We are only allowed to shift by LegalShiftVal. This shift value is built
6937 // into the instruction, so we can't just use whatever we want.
6938 if (ImmVal != LegalShiftVal)
6939 return std::nullopt;
6940
6941 unsigned SignExtend = 0;
6942 if (WantsExt) {
6943 // Check if the offset is defined by an extend, unless we looked through a
6944 // G_ZEXT earlier.
6945 if (!LookedThroughZExt) {
6946 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
6947 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
6949 return std::nullopt;
6950
6951 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
6952 // We only support SXTW for signed extension here.
6953 if (SignExtend && Ext != AArch64_AM::SXTW)
6954 return std::nullopt;
6955 OffsetReg = ExtInst->getOperand(1).getReg();
6956 }
6957
6958 // Need a 32-bit wide register here.
6959 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
6960 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6961 }
6962
6963 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
6964 // offset. Signify that we are shifting by setting the shift flag to 1.
6965 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
6966 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
6967 [=](MachineInstrBuilder &MIB) {
6968 // Need to add both immediates here to make sure that they are both
6969 // added to the instruction.
6970 MIB.addImm(SignExtend);
6971 MIB.addImm(1);
6972 }}};
6973}
6974
6975/// This is used for computing addresses like this:
6976///
6977/// ldr x1, [x2, x3, lsl #3]
6978///
6979/// Where x2 is the base register, and x3 is an offset register. The shift-left
6980/// is a constant value specific to this load instruction. That is, we'll never
6981/// see anything other than a 3 here (which corresponds to the size of the
6982/// element being loaded.)
6984AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6985 MachineOperand &Root, unsigned SizeInBytes) const {
6986 if (!Root.isReg())
6987 return std::nullopt;
6989
6990 // We want to find something like this:
6991 //
6992 // val = G_CONSTANT LegalShiftVal
6993 // shift = G_SHL off_reg val
6994 // ptr = G_PTR_ADD base_reg shift
6995 // x = G_LOAD ptr
6996 //
6997 // And fold it into this addressing mode:
6998 //
6999 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7000
7001 // Check if we can find the G_PTR_ADD.
7002 MachineInstr *PtrAdd =
7003 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7004 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7005 return std::nullopt;
7006
7007 // Now, try to match an opcode which will match our specific offset.
7008 // We want a G_SHL or a G_MUL.
7009 MachineInstr *OffsetInst =
7011 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7012 OffsetInst->getOperand(0), SizeInBytes,
7013 /*WantsExt=*/false);
7014}
7015
7016/// This is used for computing addresses like this:
7017///
7018/// ldr x1, [x2, x3]
7019///
7020/// Where x2 is the base register, and x3 is an offset register.
7021///
7022/// When possible (or profitable) to fold a G_PTR_ADD into the address
7023/// calculation, this will do so. Otherwise, it will return std::nullopt.
7025AArch64InstructionSelector::selectAddrModeRegisterOffset(
7026 MachineOperand &Root) const {
7028
7029 // We need a GEP.
7030 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7031 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7032 return std::nullopt;
7033
7034 // If this is used more than once, let's not bother folding.
7035 // TODO: Check if they are memory ops. If they are, then we can still fold
7036 // without having to recompute anything.
7037 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7038 return std::nullopt;
7039
7040 // Base is the GEP's LHS, offset is its RHS.
7041 return {{[=](MachineInstrBuilder &MIB) {
7042 MIB.addUse(Gep->getOperand(1).getReg());
7043 },
7044 [=](MachineInstrBuilder &MIB) {
7045 MIB.addUse(Gep->getOperand(2).getReg());
7046 },
7047 [=](MachineInstrBuilder &MIB) {
7048 // Need to add both immediates here to make sure that they are both
7049 // added to the instruction.
7050 MIB.addImm(0);
7051 MIB.addImm(0);
7052 }}};
7053}
7054
7055/// This is intended to be equivalent to selectAddrModeXRO in
7056/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7058AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7059 unsigned SizeInBytes) const {
7061 if (!Root.isReg())
7062 return std::nullopt;
7063 MachineInstr *PtrAdd =
7064 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7065 if (!PtrAdd)
7066 return std::nullopt;
7067
7068 // Check for an immediates which cannot be encoded in the [base + imm]
7069 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7070 // end up with code like:
7071 //
7072 // mov x0, wide
7073 // add x1 base, x0
7074 // ldr x2, [x1, x0]
7075 //
7076 // In this situation, we can use the [base, xreg] addressing mode to save an
7077 // add/sub:
7078 //
7079 // mov x0, wide
7080 // ldr x2, [base, x0]
7081 auto ValAndVReg =
7083 if (ValAndVReg) {
7084 unsigned Scale = Log2_32(SizeInBytes);
7085 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7086
7087 // Skip immediates that can be selected in the load/store addresing
7088 // mode.
7089 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7090 ImmOff < (0x1000 << Scale))
7091 return std::nullopt;
7092
7093 // Helper lambda to decide whether or not it is preferable to emit an add.
7094 auto isPreferredADD = [](int64_t ImmOff) {
7095 // Constants in [0x0, 0xfff] can be encoded in an add.
7096 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7097 return true;
7098
7099 // Can it be encoded in an add lsl #12?
7100 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7101 return false;
7102
7103 // It can be encoded in an add lsl #12, but we may not want to. If it is
7104 // possible to select this as a single movz, then prefer that. A single
7105 // movz is faster than an add with a shift.
7106 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7107 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7108 };
7109
7110 // If the immediate can be encoded in a single add/sub, then bail out.
7111 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7112 return std::nullopt;
7113 }
7114
7115 // Try to fold shifts into the addressing mode.
7116 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7117 if (AddrModeFns)
7118 return AddrModeFns;
7119
7120 // If that doesn't work, see if it's possible to fold in registers from
7121 // a GEP.
7122 return selectAddrModeRegisterOffset(Root);
7123}
7124
7125/// This is used for computing addresses like this:
7126///
7127/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7128///
7129/// Where we have a 64-bit base register, a 32-bit offset register, and an
7130/// extend (which may or may not be signed).
7132AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7133 unsigned SizeInBytes) const {
7135
7136 MachineInstr *PtrAdd =
7137 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7138 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7139 return std::nullopt;
7140
7141 MachineOperand &LHS = PtrAdd->getOperand(1);
7142 MachineOperand &RHS = PtrAdd->getOperand(2);
7143 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7144
7145 // The first case is the same as selectAddrModeXRO, except we need an extend.
7146 // In this case, we try to find a shift and extend, and fold them into the
7147 // addressing mode.
7148 //
7149 // E.g.
7150 //
7151 // off_reg = G_Z/S/ANYEXT ext_reg
7152 // val = G_CONSTANT LegalShiftVal
7153 // shift = G_SHL off_reg val
7154 // ptr = G_PTR_ADD base_reg shift
7155 // x = G_LOAD ptr
7156 //
7157 // In this case we can get a load like this:
7158 //
7159 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7160 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7161 SizeInBytes, /*WantsExt=*/true);
7162 if (ExtendedShl)
7163 return ExtendedShl;
7164
7165 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7166 //
7167 // e.g.
7168 // ldr something, [base_reg, ext_reg, sxtw]
7169 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
7170 return std::nullopt;
7171
7172 // Check if this is an extend. We'll get an extend type if it is.
7174 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7176 return std::nullopt;
7177
7178 // Need a 32-bit wide register.
7179 MachineIRBuilder MIB(*PtrAdd);
7180 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7181 AArch64::GPR32RegClass, MIB);
7182 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7183
7184 // Base is LHS, offset is ExtReg.
7185 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7186 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7187 [=](MachineInstrBuilder &MIB) {
7188 MIB.addImm(SignExtend);
7189 MIB.addImm(0);
7190 }}};
7191}
7192
7193/// Select a "register plus unscaled signed 9-bit immediate" address. This
7194/// should only match when there is an offset that is not valid for a scaled
7195/// immediate addressing mode. The "Size" argument is the size in bytes of the
7196/// memory reference, which is needed here to know what is valid for a scaled
7197/// immediate.
7199AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7200 unsigned Size) const {
7202 Root.getParent()->getParent()->getParent()->getRegInfo();
7203
7204 if (!Root.isReg())
7205 return std::nullopt;
7206
7207 if (!isBaseWithConstantOffset(Root, MRI))
7208 return std::nullopt;
7209
7210 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7211
7212 MachineOperand &OffImm = RootDef->getOperand(2);
7213 if (!OffImm.isReg())
7214 return std::nullopt;
7215 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7216 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7217 return std::nullopt;
7218 int64_t RHSC;
7219 MachineOperand &RHSOp1 = RHS->getOperand(1);
7220 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7221 return std::nullopt;
7222 RHSC = RHSOp1.getCImm()->getSExtValue();
7223
7224 if (RHSC >= -256 && RHSC < 256) {
7225 MachineOperand &Base = RootDef->getOperand(1);
7226 return {{
7227 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7228 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7229 }};
7230 }
7231 return std::nullopt;
7232}
7233
7235AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7236 unsigned Size,
7237 MachineRegisterInfo &MRI) const {
7238 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7239 return std::nullopt;
7240 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7241 if (Adrp.getOpcode() != AArch64::ADRP)
7242 return std::nullopt;
7243
7244 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7245 auto Offset = Adrp.getOperand(1).getOffset();
7246 if (Offset % Size != 0)
7247 return std::nullopt;
7248
7249 auto GV = Adrp.getOperand(1).getGlobal();
7250 if (GV->isThreadLocal())
7251 return std::nullopt;
7252
7253 auto &MF = *RootDef.getParent()->getParent();
7254 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7255 return std::nullopt;
7256
7257 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7258 MachineIRBuilder MIRBuilder(RootDef);
7259 Register AdrpReg = Adrp.getOperand(0).getReg();
7260 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7261 [=](MachineInstrBuilder &MIB) {
7262 MIB.addGlobalAddress(GV, Offset,
7263 OpFlags | AArch64II::MO_PAGEOFF |
7265 }}};
7266}
7267
7268/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7269/// "Size" argument is the size in bytes of the memory reference, which
7270/// determines the scale.
7272AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7273 unsigned Size) const {
7274 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7276
7277 if (!Root.isReg())
7278 return std::nullopt;
7279
7280 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7281 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7282 return {{
7283 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7284 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7285 }};
7286 }
7287
7289 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7290 if (CM == CodeModel::Small) {
7291 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7292 if (OpFns)
7293 return OpFns;
7294 }
7295
7296 if (isBaseWithConstantOffset(Root, MRI)) {
7297 MachineOperand &LHS = RootDef->getOperand(1);
7298 MachineOperand &RHS = RootDef->getOperand(2);
7299 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7300 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7301
7302 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7303 unsigned Scale = Log2_32(Size);
7304 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7305 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7306 return {{
7307 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7308 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7309 }};
7310
7311 return {{
7312 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7313 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7314 }};
7315 }
7316 }
7317
7318 // Before falling back to our general case, check if the unscaled
7319 // instructions can handle this. If so, that's preferable.
7320 if (selectAddrModeUnscaled(Root, Size))
7321 return std::nullopt;
7322
7323 return {{
7324 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7325 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7326 }};
7327}
7328
7329/// Given a shift instruction, return the correct shift type for that
7330/// instruction.
7332 switch (MI.getOpcode()) {
7333 default:
7335 case TargetOpcode::G_SHL:
7336 return AArch64_AM::LSL;
7337 case TargetOpcode::G_LSHR:
7338 return AArch64_AM::LSR;
7339 case TargetOpcode::G_ASHR:
7340 return AArch64_AM::ASR;
7341 case TargetOpcode::G_ROTR:
7342 return AArch64_AM::ROR;
7343 }
7344}
7345
7346/// Select a "shifted register" operand. If the value is not shifted, set the
7347/// shift operand to a default value of "lsl 0".
7349AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7350 bool AllowROR) const {
7351 if (!Root.isReg())
7352 return std::nullopt;
7354 Root.getParent()->getParent()->getParent()->getRegInfo();
7355
7356 // Check if the operand is defined by an instruction which corresponds to
7357 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7358 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7360 if (ShType == AArch64_AM::InvalidShiftExtend)
7361 return std::nullopt;
7362 if (ShType == AArch64_AM::ROR && !AllowROR)
7363 return std::nullopt;
7364 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
7365 return std::nullopt;
7366
7367 // Need an immediate on the RHS.
7368 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7369 auto Immed = getImmedFromMO(ShiftRHS);
7370 if (!Immed)
7371 return std::nullopt;
7372
7373 // We have something that we can fold. Fold in the shift's LHS and RHS into
7374 // the instruction.
7375 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7376 Register ShiftReg = ShiftLHS.getReg();
7377
7378 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7379 unsigned Val = *Immed & (NumBits - 1);
7380 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7381
7382 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7383 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7384}
7385
7386AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7387 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7388 unsigned Opc = MI.getOpcode();
7389
7390 // Handle explicit extend instructions first.
7391 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7392 unsigned Size;
7393 if (Opc == TargetOpcode::G_SEXT)
7394 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7395 else
7396 Size = MI.getOperand(2).getImm();
7397 assert(Size != 64 && "Extend from 64 bits?");
7398 switch (Size) {
7399 case 8:
7400 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7401 case 16:
7402 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7403 case 32:
7404 return AArch64_AM::SXTW;
7405 default:
7407 }
7408 }
7409
7410 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7411 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7412 assert(Size != 64 && "Extend from 64 bits?");
7413 switch (Size) {
7414 case 8:
7415 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7416 case 16:
7417 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7418 case 32:
7419 return AArch64_AM::UXTW;
7420 default:
7422 }
7423 }
7424
7425 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7426 // on the RHS.
7427 if (Opc != TargetOpcode::G_AND)
7429
7430 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7431 if (!MaybeAndMask)
7433 uint64_t AndMask = *MaybeAndMask;
7434 switch (AndMask) {
7435 default:
7437 case 0xFF:
7438 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7439 case 0xFFFF:
7440 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7441 case 0xFFFFFFFF:
7442 return AArch64_AM::UXTW;
7443 }
7444}
7445
7446Register AArch64InstructionSelector::moveScalarRegClass(
7447 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7448 MachineRegisterInfo &MRI = *MIB.getMRI();
7449 auto Ty = MRI.getType(Reg);
7450 assert(!Ty.isVector() && "Expected scalars only!");
7451 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7452 return Reg;
7453
7454 // Create a copy and immediately select it.
7455 // FIXME: We should have an emitCopy function?
7456 auto Copy = MIB.buildCopy({&RC}, {Reg});
7457 selectCopy(*Copy, TII, MRI, TRI, RBI);
7458 return Copy.getReg(0);
7459}
7460
7461/// Select an "extended register" operand. This operand folds in an extend
7462/// followed by an optional left shift.
7464AArch64InstructionSelector::selectArithExtendedRegister(
7465 MachineOperand &Root) const {
7466 if (!Root.isReg())
7467 return std::nullopt;
7469 Root.getParent()->getParent()->getParent()->getRegInfo();
7470
7471 uint64_t ShiftVal = 0;
7472 Register ExtReg;
7474 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7475 if (!RootDef)
7476 return std::nullopt;
7477
7478 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
7479 return std::nullopt;
7480
7481 // Check if we can fold a shift and an extend.
7482 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7483 // Look for a constant on the RHS of the shift.
7484 MachineOperand &RHS = RootDef->getOperand(2);
7485 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7486 if (!MaybeShiftVal)
7487 return std::nullopt;
7488 ShiftVal = *MaybeShiftVal;
7489 if (ShiftVal > 4)
7490 return std::nullopt;
7491 // Look for a valid extend instruction on the LHS of the shift.
7492 MachineOperand &LHS = RootDef->getOperand(1);
7493 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7494 if (!ExtDef)
7495 return std::nullopt;
7496 Ext = getExtendTypeForInst(*ExtDef, MRI);
7498 return std::nullopt;
7499 ExtReg = ExtDef->getOperand(1).getReg();
7500 } else {
7501 // Didn't get a shift. Try just folding an extend.
7502 Ext = getExtendTypeForInst(*RootDef, MRI);
7504 return std::nullopt;
7505 ExtReg = RootDef->getOperand(1).getReg();
7506
7507 // If we have a 32 bit instruction which zeroes out the high half of a
7508 // register, we get an implicit zero extend for free. Check if we have one.
7509 // FIXME: We actually emit the extend right now even though we don't have
7510 // to.
7511 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7512 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7513 if (isDef32(*ExtInst))
7514 return std::nullopt;
7515 }
7516 }
7517
7518 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7519 // copy.
7520 MachineIRBuilder MIB(*RootDef);
7521 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7522
7523 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7524 [=](MachineInstrBuilder &MIB) {
7525 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7526 }}};
7527}
7528
7530AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7531 if (!Root.isReg())
7532 return std::nullopt;
7534 Root.getParent()->getParent()->getParent()->getRegInfo();
7535
7536 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7537 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7538 STI.isLittleEndian())
7539 Extract =
7540 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7541 if (!Extract)
7542 return std::nullopt;
7543
7544 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7545 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7546 Register ExtReg = Extract->MI->getOperand(2).getReg();
7547 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7548 }
7549 }
7550 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7551 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7553 Extract->MI->getOperand(2).getReg(), MRI);
7554 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7555 LaneIdx->Value.getSExtValue() == 1) {
7556 Register ExtReg = Extract->MI->getOperand(1).getReg();
7557 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7558 }
7559 }
7560
7561 return std::nullopt;
7562}
7563
7564void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7565 const MachineInstr &MI,
7566 int OpIdx) const {
7567 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7568 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7569 "Expected G_CONSTANT");
7570 std::optional<int64_t> CstVal =
7571 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7572 assert(CstVal && "Expected constant value");
7573 MIB.addImm(*CstVal);
7574}
7575
7576void AArch64InstructionSelector::renderLogicalImm32(
7577 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7578 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7579 "Expected G_CONSTANT");
7580 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7582 MIB.addImm(Enc);
7583}
7584
7585void AArch64InstructionSelector::renderLogicalImm64(
7586 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7587 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7588 "Expected G_CONSTANT");
7589 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7591 MIB.addImm(Enc);
7592}
7593
7594void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7595 const MachineInstr &MI,
7596 int OpIdx) const {
7597 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7598 "Expected G_UBSANTRAP");
7599 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7600}
7601
7602void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7603 const MachineInstr &MI,
7604 int OpIdx) const {
7605 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7606 "Expected G_FCONSTANT");
7607 MIB.addImm(
7608 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7609}
7610
7611void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7612 const MachineInstr &MI,
7613 int OpIdx) const {
7614 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7615 "Expected G_FCONSTANT");
7616 MIB.addImm(
7617 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7618}
7619
7620void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7621 const MachineInstr &MI,
7622 int OpIdx) const {
7623 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7624 "Expected G_FCONSTANT");
7625 MIB.addImm(
7626 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7627}
7628
7629void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7630 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7631 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7632 "Expected G_FCONSTANT");
7634 .getFPImm()
7635 ->getValueAPF()
7636 .bitcastToAPInt()
7637 .getZExtValue()));
7638}
7639
7640bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7641 const MachineInstr &MI, unsigned NumBytes) const {
7642 if (!MI.mayLoadOrStore())
7643 return false;
7644 assert(MI.hasOneMemOperand() &&
7645 "Expected load/store to have only one mem op!");
7646 return (*MI.memoperands_begin())->getSize() == NumBytes;
7647}
7648
7649bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7650 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7651 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7652 return false;
7653
7654 // Only return true if we know the operation will zero-out the high half of
7655 // the 64-bit register. Truncates can be subregister copies, which don't
7656 // zero out the high bits. Copies and other copy-like instructions can be
7657 // fed by truncates, or could be lowered as subregister copies.
7658 switch (MI.getOpcode()) {
7659 default:
7660 return true;
7661 case TargetOpcode::COPY:
7662 case TargetOpcode::G_BITCAST:
7663 case TargetOpcode::G_TRUNC:
7664 case TargetOpcode::G_PHI:
7665 return false;
7666 }
7667}
7668
7669
7670// Perform fixups on the given PHI instruction's operands to force them all
7671// to be the same as the destination regbank.
7673 const AArch64RegisterBankInfo &RBI) {
7674 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7675 Register DstReg = MI.getOperand(0).getReg();
7676 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7677 assert(DstRB && "Expected PHI dst to have regbank assigned");
7678 MachineIRBuilder MIB(MI);
7679
7680 // Go through each operand and ensure it has the same regbank.
7681 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7682 if (!MO.isReg())
7683 continue;
7684 Register OpReg = MO.getReg();
7685 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7686 if (RB != DstRB) {
7687 // Insert a cross-bank copy.
7688 auto *OpDef = MRI.getVRegDef(OpReg);
7689 const LLT &Ty = MRI.getType(OpReg);
7690 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7691
7692 // Any instruction we insert must appear after all PHIs in the block
7693 // for the block to be valid MIR.
7694 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7695 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7696 InsertPt = OpDefBB.getFirstNonPHI();
7697 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7698 auto Copy = MIB.buildCopy(Ty, OpReg);
7699 MRI.setRegBank(Copy.getReg(0), *DstRB);
7700 MO.setReg(Copy.getReg(0));
7701 }
7702 }
7703}
7704
7705void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7706 // We're looking for PHIs, build a list so we don't invalidate iterators.
7709 for (auto &BB : MF) {
7710 for (auto &MI : BB) {
7711 if (MI.getOpcode() == TargetOpcode::G_PHI)
7712 Phis.emplace_back(&MI);
7713 }
7714 }
7715
7716 for (auto *MI : Phis) {
7717 // We need to do some work here if the operand types are < 16 bit and they
7718 // are split across fpr/gpr banks. Since all types <32b on gpr
7719 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7720 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7721 // be selecting heterogenous regbanks for operands if possible, but we
7722 // still need to be able to deal with it here.
7723 //
7724 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7725 // one other operand is on the fpr bank, then we add cross-bank copies
7726 // to homogenize the operand banks. For simplicity the bank that we choose
7727 // to settle on is whatever bank the def operand has. For example:
7728 //
7729 // %endbb:
7730 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7731 // =>
7732 // %bb2:
7733 // ...
7734 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7735 // ...
7736 // %endbb:
7737 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7738 bool HasGPROp = false, HasFPROp = false;
7739 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7740 if (!MO.isReg())
7741 continue;
7742 const LLT &Ty = MRI.getType(MO.getReg());
7743 if (!Ty.isValid() || !Ty.isScalar())
7744 break;
7745 if (Ty.getSizeInBits() >= 32)
7746 break;
7747 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7748 // If for some reason we don't have a regbank yet. Don't try anything.
7749 if (!RB)
7750 break;
7751
7752 if (RB->getID() == AArch64::GPRRegBankID)
7753 HasGPROp = true;
7754 else
7755 HasFPROp = true;
7756 }
7757 // We have heterogenous regbanks, need to fixup.
7758 if (HasGPROp && HasFPROp)
7759 fixupPHIOpBanks(*MI, MRI, RBI);
7760 }
7761}
7762
7763namespace llvm {
7766 AArch64Subtarget &Subtarget,
7768 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7769}
7770}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
MachineBasicBlock & MBB
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:996
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:1001
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:1004
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:1002
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:1009
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:1003
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
bool isIntPredicate() const
Definition: InstrTypes.h:1123
bool isUnsigned() const
Definition: InstrTypes.h:1271
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:2958
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:318
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:315
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:160
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1699
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1758
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:882
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:54
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:438
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:293
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:153
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:305
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1609
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:419
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:446
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:472
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.