LLVM 22.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
519 const AArch64TargetMachine &TM;
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
557#include "AArch64GenGlobalISel.inc"
560#include "AArch64GenGlobalISel.inc"
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
619 if (SizeInBits <= 32)
620 return GetAllRegSet ? &AArch64::GPR32allRegClass
621 : &AArch64::GPR32RegClass;
622 if (SizeInBits == 64)
623 return GetAllRegSet ? &AArch64::GPR64allRegClass
624 : &AArch64::GPR64RegClass;
625 if (SizeInBits == 128)
626 return &AArch64::XSeqPairsClassRegClass;
627 }
628
629 if (RegBankID == AArch64::FPRRegBankID) {
630 if (SizeInBits.isScalable()) {
631 assert(SizeInBits == TypeSize::getScalable(128) &&
632 "Unexpected scalable register size");
633 return &AArch64::ZPRRegClass;
634 }
635
636 switch (SizeInBits) {
637 default:
638 return nullptr;
639 case 8:
640 return &AArch64::FPR8RegClass;
641 case 16:
642 return &AArch64::FPR16RegClass;
643 case 32:
644 return &AArch64::FPR32RegClass;
645 case 64:
646 return &AArch64::FPR64RegClass;
647 case 128:
648 return &AArch64::FPR128RegClass;
649 }
650 }
651
652 return nullptr;
653}
654
655/// Returns the correct subregister to use for a given register class.
657 const TargetRegisterInfo &TRI, unsigned &SubReg) {
658 switch (TRI.getRegSizeInBits(*RC)) {
659 case 8:
660 SubReg = AArch64::bsub;
661 break;
662 case 16:
663 SubReg = AArch64::hsub;
664 break;
665 case 32:
666 if (RC != &AArch64::FPR32RegClass)
667 SubReg = AArch64::sub_32;
668 else
669 SubReg = AArch64::ssub;
670 break;
671 case 64:
672 SubReg = AArch64::dsub;
673 break;
674 default:
676 dbgs() << "Couldn't find appropriate subregister for register class.");
677 return false;
678 }
679
680 return true;
681}
682
683/// Returns the minimum size the given register bank can hold.
684static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
685 switch (RB.getID()) {
686 case AArch64::GPRRegBankID:
687 return 32;
688 case AArch64::FPRRegBankID:
689 return 8;
690 default:
691 llvm_unreachable("Tried to get minimum size for unknown register bank.");
692 }
693}
694
695/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
696/// Helper function for functions like createDTuple and createQTuple.
697///
698/// \p RegClassIDs - The list of register class IDs available for some tuple of
699/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
700/// expected to contain between 2 and 4 tuple classes.
701///
702/// \p SubRegs - The list of subregister classes associated with each register
703/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
704/// subregister class. The index of each subregister class is expected to
705/// correspond with the index of each register class.
706///
707/// \returns Either the destination register of REG_SEQUENCE instruction that
708/// was created, or the 0th element of \p Regs if \p Regs contains a single
709/// element.
711 const unsigned RegClassIDs[],
712 const unsigned SubRegs[], MachineIRBuilder &MIB) {
713 unsigned NumRegs = Regs.size();
714 if (NumRegs == 1)
715 return Regs[0];
716 assert(NumRegs >= 2 && NumRegs <= 4 &&
717 "Only support between two and 4 registers in a tuple!");
719 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
720 auto RegSequence =
721 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
722 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
723 RegSequence.addUse(Regs[I]);
724 RegSequence.addImm(SubRegs[I]);
725 }
726 return RegSequence.getReg(0);
727}
728
729/// Create a tuple of D-registers using the registers in \p Regs.
731 static const unsigned RegClassIDs[] = {
732 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
733 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
734 AArch64::dsub2, AArch64::dsub3};
735 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
736}
737
738/// Create a tuple of Q-registers using the registers in \p Regs.
740 static const unsigned RegClassIDs[] = {
741 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
742 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
743 AArch64::qsub2, AArch64::qsub3};
744 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
745}
746
747static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
748 auto &MI = *Root.getParent();
749 auto &MBB = *MI.getParent();
750 auto &MF = *MBB.getParent();
751 auto &MRI = MF.getRegInfo();
752 uint64_t Immed;
753 if (Root.isImm())
754 Immed = Root.getImm();
755 else if (Root.isCImm())
756 Immed = Root.getCImm()->getZExtValue();
757 else if (Root.isReg()) {
758 auto ValAndVReg =
760 if (!ValAndVReg)
761 return std::nullopt;
762 Immed = ValAndVReg->Value.getSExtValue();
763 } else
764 return std::nullopt;
765 return Immed;
766}
767
768/// Check whether \p I is a currently unsupported binary operation:
769/// - it has an unsized type
770/// - an operand is not a vreg
771/// - all operands are not in the same bank
772/// These are checks that should someday live in the verifier, but right now,
773/// these are mostly limitations of the aarch64 selector.
774static bool unsupportedBinOp(const MachineInstr &I,
775 const AArch64RegisterBankInfo &RBI,
777 const AArch64RegisterInfo &TRI) {
778 LLT Ty = MRI.getType(I.getOperand(0).getReg());
779 if (!Ty.isValid()) {
780 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
781 return true;
782 }
783
784 const RegisterBank *PrevOpBank = nullptr;
785 for (auto &MO : I.operands()) {
786 // FIXME: Support non-register operands.
787 if (!MO.isReg()) {
788 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
789 return true;
790 }
791
792 // FIXME: Can generic operations have physical registers operands? If
793 // so, this will need to be taught about that, and we'll need to get the
794 // bank out of the minimal class for the register.
795 // Either way, this needs to be documented (and possibly verified).
796 if (!MO.getReg().isVirtual()) {
797 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
798 return true;
799 }
800
801 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
802 if (!OpBank) {
803 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
804 return true;
805 }
806
807 if (PrevOpBank && OpBank != PrevOpBank) {
808 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
809 return true;
810 }
811 PrevOpBank = OpBank;
812 }
813 return false;
814}
815
816/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
817/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
818/// and of size \p OpSize.
819/// \returns \p GenericOpc if the combination is unsupported.
820static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
821 unsigned OpSize) {
822 switch (RegBankID) {
823 case AArch64::GPRRegBankID:
824 if (OpSize == 32) {
825 switch (GenericOpc) {
826 case TargetOpcode::G_SHL:
827 return AArch64::LSLVWr;
828 case TargetOpcode::G_LSHR:
829 return AArch64::LSRVWr;
830 case TargetOpcode::G_ASHR:
831 return AArch64::ASRVWr;
832 default:
833 return GenericOpc;
834 }
835 } else if (OpSize == 64) {
836 switch (GenericOpc) {
837 case TargetOpcode::G_PTR_ADD:
838 return AArch64::ADDXrr;
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVXr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVXr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVXr;
845 default:
846 return GenericOpc;
847 }
848 }
849 break;
850 case AArch64::FPRRegBankID:
851 switch (OpSize) {
852 case 32:
853 switch (GenericOpc) {
854 case TargetOpcode::G_FADD:
855 return AArch64::FADDSrr;
856 case TargetOpcode::G_FSUB:
857 return AArch64::FSUBSrr;
858 case TargetOpcode::G_FMUL:
859 return AArch64::FMULSrr;
860 case TargetOpcode::G_FDIV:
861 return AArch64::FDIVSrr;
862 default:
863 return GenericOpc;
864 }
865 case 64:
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDDrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBDrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULDrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVDrr;
875 case TargetOpcode::G_OR:
876 return AArch64::ORRv8i8;
877 default:
878 return GenericOpc;
879 }
880 }
881 break;
882 }
883 return GenericOpc;
884}
885
886/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
887/// appropriate for the (value) register bank \p RegBankID and of memory access
888/// size \p OpSize. This returns the variant with the base+unsigned-immediate
889/// addressing mode (e.g., LDRXui).
890/// \returns \p GenericOpc if the combination is unsupported.
891static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
892 unsigned OpSize) {
893 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
894 switch (RegBankID) {
895 case AArch64::GPRRegBankID:
896 switch (OpSize) {
897 case 8:
898 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
899 case 16:
900 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
901 case 32:
902 return isStore ? AArch64::STRWui : AArch64::LDRWui;
903 case 64:
904 return isStore ? AArch64::STRXui : AArch64::LDRXui;
905 }
906 break;
907 case AArch64::FPRRegBankID:
908 switch (OpSize) {
909 case 8:
910 return isStore ? AArch64::STRBui : AArch64::LDRBui;
911 case 16:
912 return isStore ? AArch64::STRHui : AArch64::LDRHui;
913 case 32:
914 return isStore ? AArch64::STRSui : AArch64::LDRSui;
915 case 64:
916 return isStore ? AArch64::STRDui : AArch64::LDRDui;
917 case 128:
918 return isStore ? AArch64::STRQui : AArch64::LDRQui;
919 }
920 break;
921 }
922 return GenericOpc;
923}
924
925/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
926/// to \p *To.
927///
928/// E.g "To = COPY SrcReg:SubReg"
930 const RegisterBankInfo &RBI, Register SrcReg,
931 const TargetRegisterClass *To, unsigned SubReg) {
932 assert(SrcReg.isValid() && "Expected a valid source register?");
933 assert(To && "Destination register class cannot be null");
934 assert(SubReg && "Expected a valid subregister");
935
936 MachineIRBuilder MIB(I);
937 auto SubRegCopy =
938 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
939 MachineOperand &RegOp = I.getOperand(1);
940 RegOp.setReg(SubRegCopy.getReg(0));
941
942 // It's possible that the destination register won't be constrained. Make
943 // sure that happens.
944 if (!I.getOperand(0).getReg().isPhysical())
945 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
946
947 return true;
948}
949
950/// Helper function to get the source and destination register classes for a
951/// copy. Returns a std::pair containing the source register class for the
952/// copy, and the destination register class for the copy. If a register class
953/// cannot be determined, then it will be nullptr.
954static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
957 const RegisterBankInfo &RBI) {
958 Register DstReg = I.getOperand(0).getReg();
959 Register SrcReg = I.getOperand(1).getReg();
960 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
961 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
962
963 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
964 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
965
966 // Special casing for cross-bank copies of s1s. We can technically represent
967 // a 1-bit value with any size of register. The minimum size for a GPR is 32
968 // bits. So, we need to put the FPR on 32 bits as well.
969 //
970 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
971 // then we can pull it into the helpers that get the appropriate class for a
972 // register bank. Or make a new helper that carries along some constraint
973 // information.
974 if (SrcRegBank != DstRegBank &&
975 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
976 SrcSize = DstSize = TypeSize::getFixed(32);
977
978 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
979 getMinClassForRegBank(DstRegBank, DstSize, true)};
980}
981
982// FIXME: We need some sort of API in RBI/TRI to allow generic code to
983// constrain operands of simple instructions given a TargetRegisterClass
984// and LLT
986 const RegisterBankInfo &RBI) {
987 for (MachineOperand &MO : I.operands()) {
988 if (!MO.isReg())
989 continue;
990 Register Reg = MO.getReg();
991 if (!Reg)
992 continue;
993 if (Reg.isPhysical())
994 continue;
995 LLT Ty = MRI.getType(Reg);
996 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
997 const TargetRegisterClass *RC =
999 if (!RC) {
1000 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1001 RC = getRegClassForTypeOnBank(Ty, RB);
1002 if (!RC) {
1003 LLVM_DEBUG(
1004 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1005 break;
1006 }
1007 }
1008 RBI.constrainGenericRegister(Reg, *RC, MRI);
1009 }
1010
1011 return true;
1012}
1013
1016 const RegisterBankInfo &RBI) {
1017 Register DstReg = I.getOperand(0).getReg();
1018 Register SrcReg = I.getOperand(1).getReg();
1019 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1020 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1021
1022 // Find the correct register classes for the source and destination registers.
1023 const TargetRegisterClass *SrcRC;
1024 const TargetRegisterClass *DstRC;
1025 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1026
1027 if (!DstRC) {
1028 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1029 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1030 return false;
1031 }
1032
1033 // Is this a copy? If so, then we may need to insert a subregister copy.
1034 if (I.isCopy()) {
1035 // Yes. Check if there's anything to fix up.
1036 if (!SrcRC) {
1037 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1038 return false;
1039 }
1040
1041 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1042 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1043 unsigned SubReg;
1044
1045 // If the source bank doesn't support a subregister copy small enough,
1046 // then we first need to copy to the destination bank.
1047 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1048 const TargetRegisterClass *DstTempRC =
1049 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1050 getSubRegForClass(DstRC, TRI, SubReg);
1051
1052 MachineIRBuilder MIB(I);
1053 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1054 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1055 } else if (SrcSize > DstSize) {
1056 // If the source register is bigger than the destination we need to
1057 // perform a subregister copy.
1058 const TargetRegisterClass *SubRegRC =
1059 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1060 getSubRegForClass(SubRegRC, TRI, SubReg);
1061 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1062 } else if (DstSize > SrcSize) {
1063 // If the destination register is bigger than the source we need to do
1064 // a promotion using SUBREG_TO_REG.
1065 const TargetRegisterClass *PromotionRC =
1066 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1067 getSubRegForClass(SrcRC, TRI, SubReg);
1068
1069 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1070 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1071 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1072 .addImm(0)
1073 .addUse(SrcReg)
1074 .addImm(SubReg);
1075 MachineOperand &RegOp = I.getOperand(1);
1076 RegOp.setReg(PromoteReg);
1077 }
1078
1079 // If the destination is a physical register, then there's nothing to
1080 // change, so we're done.
1081 if (DstReg.isPhysical())
1082 return true;
1083 }
1084
1085 // No need to constrain SrcReg. It will get constrained when we hit another
1086 // of its use or its defs. Copies do not have constraints.
1087 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1088 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1089 << " operand\n");
1090 return false;
1091 }
1092
1093 // If this a GPR ZEXT that we want to just reduce down into a copy.
1094 // The sizes will be mismatched with the source < 32b but that's ok.
1095 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1096 I.setDesc(TII.get(AArch64::COPY));
1097 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1098 return selectCopy(I, TII, MRI, TRI, RBI);
1099 }
1100
1101 I.setDesc(TII.get(AArch64::COPY));
1102 return true;
1103}
1104
1106AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1107 Register False, AArch64CC::CondCode CC,
1108 MachineIRBuilder &MIB) const {
1109 MachineRegisterInfo &MRI = *MIB.getMRI();
1110 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1111 RBI.getRegBank(True, MRI, TRI)->getID() &&
1112 "Expected both select operands to have the same regbank?");
1113 LLT Ty = MRI.getType(True);
1114 if (Ty.isVector())
1115 return nullptr;
1116 const unsigned Size = Ty.getSizeInBits();
1117 assert((Size == 32 || Size == 64) &&
1118 "Expected 32 bit or 64 bit select only?");
1119 const bool Is32Bit = Size == 32;
1120 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1121 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1122 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1124 return &*FCSel;
1125 }
1126
1127 // By default, we'll try and emit a CSEL.
1128 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1129 bool Optimized = false;
1130 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1131 &Optimized](Register &Reg, Register &OtherReg,
1132 bool Invert) {
1133 if (Optimized)
1134 return false;
1135
1136 // Attempt to fold:
1137 //
1138 // %sub = G_SUB 0, %x
1139 // %select = G_SELECT cc, %reg, %sub
1140 //
1141 // Into:
1142 // %select = CSNEG %reg, %x, cc
1143 Register MatchReg;
1144 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1145 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1146 Reg = MatchReg;
1147 if (Invert) {
1149 std::swap(Reg, OtherReg);
1150 }
1151 return true;
1152 }
1153
1154 // Attempt to fold:
1155 //
1156 // %xor = G_XOR %x, -1
1157 // %select = G_SELECT cc, %reg, %xor
1158 //
1159 // Into:
1160 // %select = CSINV %reg, %x, cc
1161 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1162 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1163 Reg = MatchReg;
1164 if (Invert) {
1166 std::swap(Reg, OtherReg);
1167 }
1168 return true;
1169 }
1170
1171 // Attempt to fold:
1172 //
1173 // %add = G_ADD %x, 1
1174 // %select = G_SELECT cc, %reg, %add
1175 //
1176 // Into:
1177 // %select = CSINC %reg, %x, cc
1178 if (mi_match(Reg, MRI,
1179 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1180 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1181 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1182 Reg = MatchReg;
1183 if (Invert) {
1185 std::swap(Reg, OtherReg);
1186 }
1187 return true;
1188 }
1189
1190 return false;
1191 };
1192
1193 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1194 // true/false values are constants.
1195 // FIXME: All of these patterns already exist in tablegen. We should be
1196 // able to import these.
1197 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1198 &Optimized]() {
1199 if (Optimized)
1200 return false;
1201 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1202 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1203 if (!TrueCst && !FalseCst)
1204 return false;
1205
1206 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1207 if (TrueCst && FalseCst) {
1208 int64_t T = TrueCst->Value.getSExtValue();
1209 int64_t F = FalseCst->Value.getSExtValue();
1210
1211 if (T == 0 && F == 1) {
1212 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1213 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1214 True = ZReg;
1215 False = ZReg;
1216 return true;
1217 }
1218
1219 if (T == 0 && F == -1) {
1220 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1221 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1222 True = ZReg;
1223 False = ZReg;
1224 return true;
1225 }
1226 }
1227
1228 if (TrueCst) {
1229 int64_t T = TrueCst->Value.getSExtValue();
1230 if (T == 1) {
1231 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1232 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1233 True = False;
1234 False = ZReg;
1236 return true;
1237 }
1238
1239 if (T == -1) {
1240 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1241 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1242 True = False;
1243 False = ZReg;
1245 return true;
1246 }
1247 }
1248
1249 if (FalseCst) {
1250 int64_t F = FalseCst->Value.getSExtValue();
1251 if (F == 1) {
1252 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1253 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1254 False = ZReg;
1255 return true;
1256 }
1257
1258 if (F == -1) {
1259 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1260 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1261 False = ZReg;
1262 return true;
1263 }
1264 }
1265 return false;
1266 };
1267
1268 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1269 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1270 Optimized |= TryOptSelectCst();
1271 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1272 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1273 return &*SelectInst;
1274}
1275
1278 MachineRegisterInfo *MRI = nullptr) {
1279 switch (P) {
1280 default:
1281 llvm_unreachable("Unknown condition code!");
1282 case CmpInst::ICMP_NE:
1283 return AArch64CC::NE;
1284 case CmpInst::ICMP_EQ:
1285 return AArch64CC::EQ;
1286 case CmpInst::ICMP_SGT:
1287 return AArch64CC::GT;
1288 case CmpInst::ICMP_SGE:
1289 if (RHS && MRI) {
1290 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1291 if (ValAndVReg && ValAndVReg->Value == 0)
1292 return AArch64CC::PL;
1293 }
1294 return AArch64CC::GE;
1295 case CmpInst::ICMP_SLT:
1296 if (RHS && MRI) {
1297 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1298 if (ValAndVReg && ValAndVReg->Value == 0)
1299 return AArch64CC::MI;
1300 }
1301 return AArch64CC::LT;
1302 case CmpInst::ICMP_SLE:
1303 return AArch64CC::LE;
1304 case CmpInst::ICMP_UGT:
1305 return AArch64CC::HI;
1306 case CmpInst::ICMP_UGE:
1307 return AArch64CC::HS;
1308 case CmpInst::ICMP_ULT:
1309 return AArch64CC::LO;
1310 case CmpInst::ICMP_ULE:
1311 return AArch64CC::LS;
1312 }
1313}
1314
1315/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1317 AArch64CC::CondCode &CondCode,
1318 AArch64CC::CondCode &CondCode2) {
1319 CondCode2 = AArch64CC::AL;
1320 switch (CC) {
1321 default:
1322 llvm_unreachable("Unknown FP condition!");
1323 case CmpInst::FCMP_OEQ:
1324 CondCode = AArch64CC::EQ;
1325 break;
1326 case CmpInst::FCMP_OGT:
1327 CondCode = AArch64CC::GT;
1328 break;
1329 case CmpInst::FCMP_OGE:
1330 CondCode = AArch64CC::GE;
1331 break;
1332 case CmpInst::FCMP_OLT:
1333 CondCode = AArch64CC::MI;
1334 break;
1335 case CmpInst::FCMP_OLE:
1336 CondCode = AArch64CC::LS;
1337 break;
1338 case CmpInst::FCMP_ONE:
1339 CondCode = AArch64CC::MI;
1340 CondCode2 = AArch64CC::GT;
1341 break;
1342 case CmpInst::FCMP_ORD:
1343 CondCode = AArch64CC::VC;
1344 break;
1345 case CmpInst::FCMP_UNO:
1346 CondCode = AArch64CC::VS;
1347 break;
1348 case CmpInst::FCMP_UEQ:
1349 CondCode = AArch64CC::EQ;
1350 CondCode2 = AArch64CC::VS;
1351 break;
1352 case CmpInst::FCMP_UGT:
1353 CondCode = AArch64CC::HI;
1354 break;
1355 case CmpInst::FCMP_UGE:
1356 CondCode = AArch64CC::PL;
1357 break;
1358 case CmpInst::FCMP_ULT:
1359 CondCode = AArch64CC::LT;
1360 break;
1361 case CmpInst::FCMP_ULE:
1362 CondCode = AArch64CC::LE;
1363 break;
1364 case CmpInst::FCMP_UNE:
1365 CondCode = AArch64CC::NE;
1366 break;
1367 }
1368}
1369
1370/// Convert an IR fp condition code to an AArch64 CC.
1371/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1372/// should be AND'ed instead of OR'ed.
1374 AArch64CC::CondCode &CondCode,
1375 AArch64CC::CondCode &CondCode2) {
1376 CondCode2 = AArch64CC::AL;
1377 switch (CC) {
1378 default:
1379 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1380 assert(CondCode2 == AArch64CC::AL);
1381 break;
1382 case CmpInst::FCMP_ONE:
1383 // (a one b)
1384 // == ((a olt b) || (a ogt b))
1385 // == ((a ord b) && (a une b))
1386 CondCode = AArch64CC::VC;
1387 CondCode2 = AArch64CC::NE;
1388 break;
1389 case CmpInst::FCMP_UEQ:
1390 // (a ueq b)
1391 // == ((a uno b) || (a oeq b))
1392 // == ((a ule b) && (a uge b))
1393 CondCode = AArch64CC::PL;
1394 CondCode2 = AArch64CC::LE;
1395 break;
1396 }
1397}
1398
1399/// Return a register which can be used as a bit to test in a TB(N)Z.
1400static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1402 assert(Reg.isValid() && "Expected valid register!");
1403 bool HasZext = false;
1405 unsigned Opc = MI->getOpcode();
1406
1407 if (!MI->getOperand(0).isReg() ||
1408 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1409 break;
1410
1411 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1412 //
1413 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1414 // on the truncated x is the same as the bit number on x.
1415 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1416 Opc == TargetOpcode::G_TRUNC) {
1417 if (Opc == TargetOpcode::G_ZEXT)
1418 HasZext = true;
1419
1420 Register NextReg = MI->getOperand(1).getReg();
1421 // Did we find something worth folding?
1422 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1423 break;
1424
1425 // NextReg is worth folding. Keep looking.
1426 Reg = NextReg;
1427 continue;
1428 }
1429
1430 // Attempt to find a suitable operation with a constant on one side.
1431 std::optional<uint64_t> C;
1432 Register TestReg;
1433 switch (Opc) {
1434 default:
1435 break;
1436 case TargetOpcode::G_AND:
1437 case TargetOpcode::G_XOR: {
1438 TestReg = MI->getOperand(1).getReg();
1439 Register ConstantReg = MI->getOperand(2).getReg();
1440 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1441 if (!VRegAndVal) {
1442 // AND commutes, check the other side for a constant.
1443 // FIXME: Can we canonicalize the constant so that it's always on the
1444 // same side at some point earlier?
1445 std::swap(ConstantReg, TestReg);
1446 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1447 }
1448 if (VRegAndVal) {
1449 if (HasZext)
1450 C = VRegAndVal->Value.getZExtValue();
1451 else
1452 C = VRegAndVal->Value.getSExtValue();
1453 }
1454 break;
1455 }
1456 case TargetOpcode::G_ASHR:
1457 case TargetOpcode::G_LSHR:
1458 case TargetOpcode::G_SHL: {
1459 TestReg = MI->getOperand(1).getReg();
1460 auto VRegAndVal =
1461 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1462 if (VRegAndVal)
1463 C = VRegAndVal->Value.getSExtValue();
1464 break;
1465 }
1466 }
1467
1468 // Didn't find a constant or viable register. Bail out of the loop.
1469 if (!C || !TestReg.isValid())
1470 break;
1471
1472 // We found a suitable instruction with a constant. Check to see if we can
1473 // walk through the instruction.
1474 Register NextReg;
1475 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1476 switch (Opc) {
1477 default:
1478 break;
1479 case TargetOpcode::G_AND:
1480 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1481 if ((*C >> Bit) & 1)
1482 NextReg = TestReg;
1483 break;
1484 case TargetOpcode::G_SHL:
1485 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1486 // the type of the register.
1487 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1488 NextReg = TestReg;
1489 Bit = Bit - *C;
1490 }
1491 break;
1492 case TargetOpcode::G_ASHR:
1493 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1494 // in x
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 if (Bit >= TestRegSize)
1498 Bit = TestRegSize - 1;
1499 break;
1500 case TargetOpcode::G_LSHR:
1501 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1502 if ((Bit + *C) < TestRegSize) {
1503 NextReg = TestReg;
1504 Bit = Bit + *C;
1505 }
1506 break;
1507 case TargetOpcode::G_XOR:
1508 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1509 // appropriate.
1510 //
1511 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1512 //
1513 // tbz x', b -> tbnz x, b
1514 //
1515 // Because x' only has the b-th bit set if x does not.
1516 if ((*C >> Bit) & 1)
1517 Invert = !Invert;
1518 NextReg = TestReg;
1519 break;
1520 }
1521
1522 // Check if we found anything worth folding.
1523 if (!NextReg.isValid())
1524 return Reg;
1525 Reg = NextReg;
1526 }
1527
1528 return Reg;
1529}
1530
1531MachineInstr *AArch64InstructionSelector::emitTestBit(
1532 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1533 MachineIRBuilder &MIB) const {
1534 assert(TestReg.isValid());
1535 assert(ProduceNonFlagSettingCondBr &&
1536 "Cannot emit TB(N)Z with speculation tracking!");
1537 MachineRegisterInfo &MRI = *MIB.getMRI();
1538
1539 // Attempt to optimize the test bit by walking over instructions.
1540 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1541 LLT Ty = MRI.getType(TestReg);
1542 unsigned Size = Ty.getSizeInBits();
1543 assert(!Ty.isVector() && "Expected a scalar!");
1544 assert(Bit < 64 && "Bit is too large!");
1545
1546 // When the test register is a 64-bit register, we have to narrow to make
1547 // TBNZW work.
1548 bool UseWReg = Bit < 32;
1549 unsigned NecessarySize = UseWReg ? 32 : 64;
1550 if (Size != NecessarySize)
1551 TestReg = moveScalarRegClass(
1552 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1553 MIB);
1554
1555 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1556 {AArch64::TBZW, AArch64::TBNZW}};
1557 unsigned Opc = OpcTable[UseWReg][IsNegative];
1558 auto TestBitMI =
1559 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1560 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1561 return &*TestBitMI;
1562}
1563
1564bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1565 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1566 MachineIRBuilder &MIB) const {
1567 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1568 // Given something like this:
1569 //
1570 // %x = ...Something...
1571 // %one = G_CONSTANT i64 1
1572 // %zero = G_CONSTANT i64 0
1573 // %and = G_AND %x, %one
1574 // %cmp = G_ICMP intpred(ne), %and, %zero
1575 // %cmp_trunc = G_TRUNC %cmp
1576 // G_BRCOND %cmp_trunc, %bb.3
1577 //
1578 // We want to try and fold the AND into the G_BRCOND and produce either a
1579 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1580 //
1581 // In this case, we'd get
1582 //
1583 // TBNZ %x %bb.3
1584 //
1585
1586 // Check if the AND has a constant on its RHS which we can use as a mask.
1587 // If it's a power of 2, then it's the same as checking a specific bit.
1588 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1589 auto MaybeBit = getIConstantVRegValWithLookThrough(
1590 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1591 if (!MaybeBit)
1592 return false;
1593
1594 int32_t Bit = MaybeBit->Value.exactLogBase2();
1595 if (Bit < 0)
1596 return false;
1597
1598 Register TestReg = AndInst.getOperand(1).getReg();
1599
1600 // Emit a TB(N)Z.
1601 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1602 return true;
1603}
1604
1605MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1606 bool IsNegative,
1607 MachineBasicBlock *DestMBB,
1608 MachineIRBuilder &MIB) const {
1609 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1610 MachineRegisterInfo &MRI = *MIB.getMRI();
1611 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1612 AArch64::GPRRegBankID &&
1613 "Expected GPRs only?");
1614 auto Ty = MRI.getType(CompareReg);
1615 unsigned Width = Ty.getSizeInBits();
1616 assert(!Ty.isVector() && "Expected scalar only?");
1617 assert(Width <= 64 && "Expected width to be at most 64?");
1618 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1619 {AArch64::CBNZW, AArch64::CBNZX}};
1620 unsigned Opc = OpcTable[IsNegative][Width == 64];
1621 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1622 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1623 return &*BranchMI;
1624}
1625
1626bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1627 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1628 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1629 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1630 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1631 // totally clean. Some of them require two branches to implement.
1632 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1633 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1634 Pred);
1635 AArch64CC::CondCode CC1, CC2;
1636 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1637 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1638 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1639 if (CC2 != AArch64CC::AL)
1640 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1641 I.eraseFromParent();
1642 return true;
1643}
1644
1645bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1646 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1647 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1648 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1649 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1650 //
1651 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1652 // instructions will not be produced, as they are conditional branch
1653 // instructions that do not set flags.
1654 if (!ProduceNonFlagSettingCondBr)
1655 return false;
1656
1657 MachineRegisterInfo &MRI = *MIB.getMRI();
1658 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1659 auto Pred =
1660 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1661 Register LHS = ICmp.getOperand(2).getReg();
1662 Register RHS = ICmp.getOperand(3).getReg();
1663
1664 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1665 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1666 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1667
1668 // When we can emit a TB(N)Z, prefer that.
1669 //
1670 // Handle non-commutative condition codes first.
1671 // Note that we don't want to do this when we have a G_AND because it can
1672 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1673 if (VRegAndVal && !AndInst) {
1674 int64_t C = VRegAndVal->Value.getSExtValue();
1675
1676 // When we have a greater-than comparison, we can just test if the msb is
1677 // zero.
1678 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1679 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1680 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1681 I.eraseFromParent();
1682 return true;
1683 }
1684
1685 // When we have a less than comparison, we can just test if the msb is not
1686 // zero.
1687 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1688 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1689 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1690 I.eraseFromParent();
1691 return true;
1692 }
1693
1694 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1695 // we can test if the msb is zero.
1696 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1697 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1698 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1699 I.eraseFromParent();
1700 return true;
1701 }
1702 }
1703
1704 // Attempt to handle commutative condition codes. Right now, that's only
1705 // eq/ne.
1706 if (ICmpInst::isEquality(Pred)) {
1707 if (!VRegAndVal) {
1708 std::swap(RHS, LHS);
1710 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1711 }
1712
1713 if (VRegAndVal && VRegAndVal->Value == 0) {
1714 // If there's a G_AND feeding into this branch, try to fold it away by
1715 // emitting a TB(N)Z instead.
1716 //
1717 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1718 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1719 // would be redundant.
1720 if (AndInst &&
1721 tryOptAndIntoCompareBranch(
1722 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1723 I.eraseFromParent();
1724 return true;
1725 }
1726
1727 // Otherwise, try to emit a CB(N)Z instead.
1728 auto LHSTy = MRI.getType(LHS);
1729 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1730 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1731 I.eraseFromParent();
1732 return true;
1733 }
1734 }
1735 }
1736
1737 return false;
1738}
1739
1740bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1741 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1742 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1743 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1744 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1745 return true;
1746
1747 // Couldn't optimize. Emit a compare + a Bcc.
1748 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1749 auto &PredOp = ICmp.getOperand(1);
1750 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1752 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1753 ICmp.getOperand(3).getReg(), MIB.getMRI());
1754 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1755 I.eraseFromParent();
1756 return true;
1757}
1758
1759bool AArch64InstructionSelector::selectCompareBranch(
1760 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1761 Register CondReg = I.getOperand(0).getReg();
1762 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1763 // Try to select the G_BRCOND using whatever is feeding the condition if
1764 // possible.
1765 unsigned CCMIOpc = CCMI->getOpcode();
1766 if (CCMIOpc == TargetOpcode::G_FCMP)
1767 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1768 if (CCMIOpc == TargetOpcode::G_ICMP)
1769 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1770
1771 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1772 // instructions will not be produced, as they are conditional branch
1773 // instructions that do not set flags.
1774 if (ProduceNonFlagSettingCondBr) {
1775 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1776 I.getOperand(1).getMBB(), MIB);
1777 I.eraseFromParent();
1778 return true;
1779 }
1780
1781 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1782 auto TstMI =
1783 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1785 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1787 .addMBB(I.getOperand(1).getMBB());
1788 I.eraseFromParent();
1789 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1790}
1791
1792/// Returns the element immediate value of a vector shift operand if found.
1793/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1794static std::optional<int64_t> getVectorShiftImm(Register Reg,
1796 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1797 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1798 return getAArch64VectorSplatScalar(*OpMI, MRI);
1799}
1800
1801/// Matches and returns the shift immediate value for a SHL instruction given
1802/// a shift operand.
1803static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1805 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1806 if (!ShiftImm)
1807 return std::nullopt;
1808 // Check the immediate is in range for a SHL.
1809 int64_t Imm = *ShiftImm;
1810 if (Imm < 0)
1811 return std::nullopt;
1812 switch (SrcTy.getElementType().getSizeInBits()) {
1813 default:
1814 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1815 return std::nullopt;
1816 case 8:
1817 if (Imm > 7)
1818 return std::nullopt;
1819 break;
1820 case 16:
1821 if (Imm > 15)
1822 return std::nullopt;
1823 break;
1824 case 32:
1825 if (Imm > 31)
1826 return std::nullopt;
1827 break;
1828 case 64:
1829 if (Imm > 63)
1830 return std::nullopt;
1831 break;
1832 }
1833 return Imm;
1834}
1835
1836bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1837 MachineRegisterInfo &MRI) {
1838 assert(I.getOpcode() == TargetOpcode::G_SHL);
1839 Register DstReg = I.getOperand(0).getReg();
1840 const LLT Ty = MRI.getType(DstReg);
1841 Register Src1Reg = I.getOperand(1).getReg();
1842 Register Src2Reg = I.getOperand(2).getReg();
1843
1844 if (!Ty.isVector())
1845 return false;
1846
1847 // Check if we have a vector of constants on RHS that we can select as the
1848 // immediate form.
1849 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1850
1851 unsigned Opc = 0;
1852 if (Ty == LLT::fixed_vector(2, 64)) {
1853 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1854 } else if (Ty == LLT::fixed_vector(4, 32)) {
1855 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1856 } else if (Ty == LLT::fixed_vector(2, 32)) {
1857 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1858 } else if (Ty == LLT::fixed_vector(4, 16)) {
1859 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1860 } else if (Ty == LLT::fixed_vector(8, 16)) {
1861 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1862 } else if (Ty == LLT::fixed_vector(16, 8)) {
1863 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1864 } else if (Ty == LLT::fixed_vector(8, 8)) {
1865 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1866 } else {
1867 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1868 return false;
1869 }
1870
1871 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1872 if (ImmVal)
1873 Shl.addImm(*ImmVal);
1874 else
1875 Shl.addUse(Src2Reg);
1877 I.eraseFromParent();
1878 return true;
1879}
1880
1881bool AArch64InstructionSelector::selectVectorAshrLshr(
1882 MachineInstr &I, MachineRegisterInfo &MRI) {
1883 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1884 I.getOpcode() == TargetOpcode::G_LSHR);
1885 Register DstReg = I.getOperand(0).getReg();
1886 const LLT Ty = MRI.getType(DstReg);
1887 Register Src1Reg = I.getOperand(1).getReg();
1888 Register Src2Reg = I.getOperand(2).getReg();
1889
1890 if (!Ty.isVector())
1891 return false;
1892
1893 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1894
1895 // We expect the immediate case to be lowered in the PostLegalCombiner to
1896 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1897
1898 // There is not a shift right register instruction, but the shift left
1899 // register instruction takes a signed value, where negative numbers specify a
1900 // right shift.
1901
1902 unsigned Opc = 0;
1903 unsigned NegOpc = 0;
1904 const TargetRegisterClass *RC =
1905 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1906 if (Ty == LLT::fixed_vector(2, 64)) {
1907 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1908 NegOpc = AArch64::NEGv2i64;
1909 } else if (Ty == LLT::fixed_vector(4, 32)) {
1910 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1911 NegOpc = AArch64::NEGv4i32;
1912 } else if (Ty == LLT::fixed_vector(2, 32)) {
1913 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1914 NegOpc = AArch64::NEGv2i32;
1915 } else if (Ty == LLT::fixed_vector(4, 16)) {
1916 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1917 NegOpc = AArch64::NEGv4i16;
1918 } else if (Ty == LLT::fixed_vector(8, 16)) {
1919 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1920 NegOpc = AArch64::NEGv8i16;
1921 } else if (Ty == LLT::fixed_vector(16, 8)) {
1922 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1923 NegOpc = AArch64::NEGv16i8;
1924 } else if (Ty == LLT::fixed_vector(8, 8)) {
1925 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1926 NegOpc = AArch64::NEGv8i8;
1927 } else {
1928 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1929 return false;
1930 }
1931
1932 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1934 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1936 I.eraseFromParent();
1937 return true;
1938}
1939
1940bool AArch64InstructionSelector::selectVaStartAAPCS(
1941 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1942
1944 MF.getFunction().isVarArg()))
1945 return false;
1946
1947 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1948 // Standard, section 10.1.5.
1949
1950 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1951 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1952 const auto *PtrRegClass =
1953 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1954
1955 const MCInstrDesc &MCIDAddAddr =
1956 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1957 const MCInstrDesc &MCIDStoreAddr =
1958 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1959
1960 /*
1961 * typedef struct va_list {
1962 * void * stack; // next stack param
1963 * void * gr_top; // end of GP arg reg save area
1964 * void * vr_top; // end of FP/SIMD arg reg save area
1965 * int gr_offs; // offset from gr_top to next GP register arg
1966 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1967 * } va_list;
1968 */
1969 const auto VAList = I.getOperand(0).getReg();
1970
1971 // Our current offset in bytes from the va_list struct (VAList).
1972 unsigned OffsetBytes = 0;
1973
1974 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1975 // and increment OffsetBytes by PtrSize.
1976 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1977 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1978 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
1979 .addDef(Top)
1980 .addFrameIndex(FrameIndex)
1981 .addImm(Imm)
1982 .addImm(0);
1984
1985 const auto *MMO = *I.memoperands_begin();
1986 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
1987 .addUse(Top)
1988 .addUse(VAList)
1989 .addImm(OffsetBytes / PtrSize)
1991 MMO->getPointerInfo().getWithOffset(OffsetBytes),
1992 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
1994
1995 OffsetBytes += PtrSize;
1996 };
1997
1998 // void* stack at offset 0
1999 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2000
2001 // void* gr_top at offset 8 (4 on ILP32)
2002 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2003 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2004
2005 // void* vr_top at offset 16 (8 on ILP32)
2006 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2007 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2008
2009 // Helper function to store a 4-byte integer constant to VAList at offset
2010 // OffsetBytes, and increment OffsetBytes by 4.
2011 const auto PushIntConstant = [&](const int32_t Value) {
2012 constexpr int IntSize = 4;
2013 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2014 auto MIB =
2015 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2016 .addDef(Temp)
2017 .addImm(Value);
2019
2020 const auto *MMO = *I.memoperands_begin();
2021 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2022 .addUse(Temp)
2023 .addUse(VAList)
2024 .addImm(OffsetBytes / IntSize)
2026 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2027 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2029 OffsetBytes += IntSize;
2030 };
2031
2032 // int gr_offs at offset 24 (12 on ILP32)
2033 PushIntConstant(-static_cast<int32_t>(GPRSize));
2034
2035 // int vr_offs at offset 28 (16 on ILP32)
2036 PushIntConstant(-static_cast<int32_t>(FPRSize));
2037
2038 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2039
2040 I.eraseFromParent();
2041 return true;
2042}
2043
2044bool AArch64InstructionSelector::selectVaStartDarwin(
2045 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2046 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2047 Register ListReg = I.getOperand(0).getReg();
2048
2049 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2050
2051 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2052 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2054 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2055 ? FuncInfo->getVarArgsGPRIndex()
2056 : FuncInfo->getVarArgsStackIndex();
2057 }
2058
2059 auto MIB =
2060 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2061 .addDef(ArgsAddrReg)
2062 .addFrameIndex(FrameIdx)
2063 .addImm(0)
2064 .addImm(0);
2065
2067
2068 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2069 .addUse(ArgsAddrReg)
2070 .addUse(ListReg)
2071 .addImm(0)
2072 .addMemOperand(*I.memoperands_begin());
2073
2075 I.eraseFromParent();
2076 return true;
2077}
2078
2079void AArch64InstructionSelector::materializeLargeCMVal(
2080 MachineInstr &I, const Value *V, unsigned OpFlags) {
2081 MachineBasicBlock &MBB = *I.getParent();
2082 MachineFunction &MF = *MBB.getParent();
2083 MachineRegisterInfo &MRI = MF.getRegInfo();
2084
2085 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2086 MovZ->addOperand(MF, I.getOperand(1));
2087 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2089 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2091
2092 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2093 Register ForceDstReg) {
2094 Register DstReg = ForceDstReg
2095 ? ForceDstReg
2096 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2097 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2098 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2099 MovI->addOperand(MF, MachineOperand::CreateGA(
2100 GV, MovZ->getOperand(1).getOffset(), Flags));
2101 } else {
2102 MovI->addOperand(
2104 MovZ->getOperand(1).getOffset(), Flags));
2105 }
2108 return DstReg;
2109 };
2110 Register DstReg = BuildMovK(MovZ.getReg(0),
2112 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2113 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2114}
2115
2116bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2117 MachineBasicBlock &MBB = *I.getParent();
2118 MachineFunction &MF = *MBB.getParent();
2119 MachineRegisterInfo &MRI = MF.getRegInfo();
2120
2121 switch (I.getOpcode()) {
2122 case TargetOpcode::G_STORE: {
2123 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2124 MachineOperand &SrcOp = I.getOperand(0);
2125 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2126 // Allow matching with imported patterns for stores of pointers. Unlike
2127 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2128 // and constrain.
2129 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2130 Register NewSrc = Copy.getReg(0);
2131 SrcOp.setReg(NewSrc);
2132 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2133 Changed = true;
2134 }
2135 return Changed;
2136 }
2137 case TargetOpcode::G_PTR_ADD: {
2138 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2139 // arithmetic semantics instead of falling back to regular arithmetic.
2140 const auto &TL = STI.getTargetLowering();
2141 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2142 return false;
2143 return convertPtrAddToAdd(I, MRI);
2144 }
2145 case TargetOpcode::G_LOAD: {
2146 // For scalar loads of pointers, we try to convert the dest type from p0
2147 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2148 // conversion, this should be ok because all users should have been
2149 // selected already, so the type doesn't matter for them.
2150 Register DstReg = I.getOperand(0).getReg();
2151 const LLT DstTy = MRI.getType(DstReg);
2152 if (!DstTy.isPointer())
2153 return false;
2154 MRI.setType(DstReg, LLT::scalar(64));
2155 return true;
2156 }
2157 case AArch64::G_DUP: {
2158 // Convert the type from p0 to s64 to help selection.
2159 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2160 if (!DstTy.isPointerVector())
2161 return false;
2162 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2163 MRI.setType(I.getOperand(0).getReg(),
2164 DstTy.changeElementType(LLT::scalar(64)));
2165 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2166 I.getOperand(1).setReg(NewSrc.getReg(0));
2167 return true;
2168 }
2169 case AArch64::G_INSERT_VECTOR_ELT: {
2170 // Convert the type from p0 to s64 to help selection.
2171 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2172 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2173 if (!SrcVecTy.isPointerVector())
2174 return false;
2175 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2176 MRI.setType(I.getOperand(1).getReg(),
2177 DstTy.changeElementType(LLT::scalar(64)));
2178 MRI.setType(I.getOperand(0).getReg(),
2179 DstTy.changeElementType(LLT::scalar(64)));
2180 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2181 I.getOperand(2).setReg(NewSrc.getReg(0));
2182 return true;
2183 }
2184 case TargetOpcode::G_UITOFP:
2185 case TargetOpcode::G_SITOFP: {
2186 // If both source and destination regbanks are FPR, then convert the opcode
2187 // to G_SITOF so that the importer can select it to an fpr variant.
2188 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2189 // copy.
2190 Register SrcReg = I.getOperand(1).getReg();
2191 LLT SrcTy = MRI.getType(SrcReg);
2192 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2193 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2194 return false;
2195
2196 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2197 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2198 I.setDesc(TII.get(AArch64::G_SITOF));
2199 else
2200 I.setDesc(TII.get(AArch64::G_UITOF));
2201 return true;
2202 }
2203 return false;
2204 }
2205 default:
2206 return false;
2207 }
2208}
2209
2210/// This lowering tries to look for G_PTR_ADD instructions and then converts
2211/// them to a standard G_ADD with a COPY on the source.
2212///
2213/// The motivation behind this is to expose the add semantics to the imported
2214/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2215/// because the selector works bottom up, uses before defs. By the time we
2216/// end up trying to select a G_PTR_ADD, we should have already attempted to
2217/// fold this into addressing modes and were therefore unsuccessful.
2218bool AArch64InstructionSelector::convertPtrAddToAdd(
2219 MachineInstr &I, MachineRegisterInfo &MRI) {
2220 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2221 Register DstReg = I.getOperand(0).getReg();
2222 Register AddOp1Reg = I.getOperand(1).getReg();
2223 const LLT PtrTy = MRI.getType(DstReg);
2224 if (PtrTy.getAddressSpace() != 0)
2225 return false;
2226
2227 const LLT CastPtrTy =
2228 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2229 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2230 // Set regbanks on the registers.
2231 if (PtrTy.isVector())
2232 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2233 else
2234 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2235
2236 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2237 // %dst(intty) = G_ADD %intbase, off
2238 I.setDesc(TII.get(TargetOpcode::G_ADD));
2239 MRI.setType(DstReg, CastPtrTy);
2240 I.getOperand(1).setReg(PtrToInt.getReg(0));
2241 if (!select(*PtrToInt)) {
2242 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2243 return false;
2244 }
2245
2246 // Also take the opportunity here to try to do some optimization.
2247 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2248 Register NegatedReg;
2249 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2250 return true;
2251 I.getOperand(2).setReg(NegatedReg);
2252 I.setDesc(TII.get(TargetOpcode::G_SUB));
2253 return true;
2254}
2255
2256bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2257 MachineRegisterInfo &MRI) {
2258 // We try to match the immediate variant of LSL, which is actually an alias
2259 // for a special case of UBFM. Otherwise, we fall back to the imported
2260 // selector which will match the register variant.
2261 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2262 const auto &MO = I.getOperand(2);
2263 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2264 if (!VRegAndVal)
2265 return false;
2266
2267 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2268 if (DstTy.isVector())
2269 return false;
2270 bool Is64Bit = DstTy.getSizeInBits() == 64;
2271 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2272 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2273
2274 if (!Imm1Fn || !Imm2Fn)
2275 return false;
2276
2277 auto NewI =
2278 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2279 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2280
2281 for (auto &RenderFn : *Imm1Fn)
2282 RenderFn(NewI);
2283 for (auto &RenderFn : *Imm2Fn)
2284 RenderFn(NewI);
2285
2286 I.eraseFromParent();
2287 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2288}
2289
2290bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2291 MachineInstr &I, MachineRegisterInfo &MRI) {
2292 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2293 // If we're storing a scalar, it doesn't matter what register bank that
2294 // scalar is on. All that matters is the size.
2295 //
2296 // So, if we see something like this (with a 32-bit scalar as an example):
2297 //
2298 // %x:gpr(s32) = ... something ...
2299 // %y:fpr(s32) = COPY %x:gpr(s32)
2300 // G_STORE %y:fpr(s32)
2301 //
2302 // We can fix this up into something like this:
2303 //
2304 // G_STORE %x:gpr(s32)
2305 //
2306 // And then continue the selection process normally.
2307 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2308 if (!DefDstReg.isValid())
2309 return false;
2310 LLT DefDstTy = MRI.getType(DefDstReg);
2311 Register StoreSrcReg = I.getOperand(0).getReg();
2312 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2313
2314 // If we get something strange like a physical register, then we shouldn't
2315 // go any further.
2316 if (!DefDstTy.isValid())
2317 return false;
2318
2319 // Are the source and dst types the same size?
2320 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2321 return false;
2322
2323 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2324 RBI.getRegBank(DefDstReg, MRI, TRI))
2325 return false;
2326
2327 // We have a cross-bank copy, which is entering a store. Let's fold it.
2328 I.getOperand(0).setReg(DefDstReg);
2329 return true;
2330}
2331
2332bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2333 assert(I.getParent() && "Instruction should be in a basic block!");
2334 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2335
2336 MachineBasicBlock &MBB = *I.getParent();
2337 MachineFunction &MF = *MBB.getParent();
2338 MachineRegisterInfo &MRI = MF.getRegInfo();
2339
2340 switch (I.getOpcode()) {
2341 case AArch64::G_DUP: {
2342 // Before selecting a DUP instruction, check if it is better selected as a
2343 // MOV or load from a constant pool.
2344 Register Src = I.getOperand(1).getReg();
2345 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2346 if (!ValAndVReg)
2347 return false;
2348 LLVMContext &Ctx = MF.getFunction().getContext();
2349 Register Dst = I.getOperand(0).getReg();
2351 MRI.getType(Dst).getNumElements(),
2352 ConstantInt::get(
2353 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2354 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2355 if (!emitConstantVector(Dst, CV, MIB, MRI))
2356 return false;
2357 I.eraseFromParent();
2358 return true;
2359 }
2360 case TargetOpcode::G_SEXT:
2361 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2362 // over a normal extend.
2363 if (selectUSMovFromExtend(I, MRI))
2364 return true;
2365 return false;
2366 case TargetOpcode::G_BR:
2367 return false;
2368 case TargetOpcode::G_SHL:
2369 return earlySelectSHL(I, MRI);
2370 case TargetOpcode::G_CONSTANT: {
2371 bool IsZero = false;
2372 if (I.getOperand(1).isCImm())
2373 IsZero = I.getOperand(1).getCImm()->isZero();
2374 else if (I.getOperand(1).isImm())
2375 IsZero = I.getOperand(1).getImm() == 0;
2376
2377 if (!IsZero)
2378 return false;
2379
2380 Register DefReg = I.getOperand(0).getReg();
2381 LLT Ty = MRI.getType(DefReg);
2382 if (Ty.getSizeInBits() == 64) {
2383 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2384 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2385 } else if (Ty.getSizeInBits() == 32) {
2386 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2387 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2388 } else
2389 return false;
2390
2391 I.setDesc(TII.get(TargetOpcode::COPY));
2392 return true;
2393 }
2394
2395 case TargetOpcode::G_ADD: {
2396 // Check if this is being fed by a G_ICMP on either side.
2397 //
2398 // (cmp pred, x, y) + z
2399 //
2400 // In the above case, when the cmp is true, we increment z by 1. So, we can
2401 // fold the add into the cset for the cmp by using cinc.
2402 //
2403 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2404 Register AddDst = I.getOperand(0).getReg();
2405 Register AddLHS = I.getOperand(1).getReg();
2406 Register AddRHS = I.getOperand(2).getReg();
2407 // Only handle scalars.
2408 LLT Ty = MRI.getType(AddLHS);
2409 if (Ty.isVector())
2410 return false;
2411 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2412 // bits.
2413 unsigned Size = Ty.getSizeInBits();
2414 if (Size != 32 && Size != 64)
2415 return false;
2416 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2417 if (!MRI.hasOneNonDBGUse(Reg))
2418 return nullptr;
2419 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2420 // compare.
2421 if (Size == 32)
2422 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2423 // We model scalar compares using 32-bit destinations right now.
2424 // If it's a 64-bit compare, it'll have 64-bit sources.
2425 Register ZExt;
2426 if (!mi_match(Reg, MRI,
2428 return nullptr;
2429 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2430 if (!Cmp ||
2431 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2432 return nullptr;
2433 return Cmp;
2434 };
2435 // Try to match
2436 // z + (cmp pred, x, y)
2437 MachineInstr *Cmp = MatchCmp(AddRHS);
2438 if (!Cmp) {
2439 // (cmp pred, x, y) + z
2440 std::swap(AddLHS, AddRHS);
2441 Cmp = MatchCmp(AddRHS);
2442 if (!Cmp)
2443 return false;
2444 }
2445 auto &PredOp = Cmp->getOperand(1);
2447 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2448 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2449 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2451 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2452 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2453 I.eraseFromParent();
2454 return true;
2455 }
2456 case TargetOpcode::G_OR: {
2457 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2458 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2459 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2460 Register Dst = I.getOperand(0).getReg();
2461 LLT Ty = MRI.getType(Dst);
2462
2463 if (!Ty.isScalar())
2464 return false;
2465
2466 unsigned Size = Ty.getSizeInBits();
2467 if (Size != 32 && Size != 64)
2468 return false;
2469
2470 Register ShiftSrc;
2471 int64_t ShiftImm;
2472 Register MaskSrc;
2473 int64_t MaskImm;
2474 if (!mi_match(
2475 Dst, MRI,
2476 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2477 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2478 return false;
2479
2480 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2481 return false;
2482
2483 int64_t Immr = Size - ShiftImm;
2484 int64_t Imms = Size - ShiftImm - 1;
2485 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2486 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2487 I.eraseFromParent();
2488 return true;
2489 }
2490 case TargetOpcode::G_FENCE: {
2491 if (I.getOperand(1).getImm() == 0)
2492 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2493 else
2494 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2495 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2496 I.eraseFromParent();
2497 return true;
2498 }
2499 default:
2500 return false;
2501 }
2502}
2503
2504bool AArch64InstructionSelector::select(MachineInstr &I) {
2505 assert(I.getParent() && "Instruction should be in a basic block!");
2506 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2507
2508 MachineBasicBlock &MBB = *I.getParent();
2509 MachineFunction &MF = *MBB.getParent();
2510 MachineRegisterInfo &MRI = MF.getRegInfo();
2511
2512 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2513 if (Subtarget->requiresStrictAlign()) {
2514 // We don't support this feature yet.
2515 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2516 return false;
2517 }
2518
2520
2521 unsigned Opcode = I.getOpcode();
2522 // G_PHI requires same handling as PHI
2523 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2524 // Certain non-generic instructions also need some special handling.
2525
2526 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2528
2529 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2530 const Register DefReg = I.getOperand(0).getReg();
2531 const LLT DefTy = MRI.getType(DefReg);
2532
2533 const RegClassOrRegBank &RegClassOrBank =
2534 MRI.getRegClassOrRegBank(DefReg);
2535
2536 const TargetRegisterClass *DefRC =
2538 if (!DefRC) {
2539 if (!DefTy.isValid()) {
2540 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2541 return false;
2542 }
2543 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2544 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2545 if (!DefRC) {
2546 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2547 return false;
2548 }
2549 }
2550
2551 I.setDesc(TII.get(TargetOpcode::PHI));
2552
2553 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2554 }
2555
2556 if (I.isCopy())
2557 return selectCopy(I, TII, MRI, TRI, RBI);
2558
2559 if (I.isDebugInstr())
2560 return selectDebugInstr(I, MRI, RBI);
2561
2562 return true;
2563 }
2564
2565
2566 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2567 LLVM_DEBUG(
2568 dbgs() << "Generic instruction has unexpected implicit operands\n");
2569 return false;
2570 }
2571
2572 // Try to do some lowering before we start instruction selecting. These
2573 // lowerings are purely transformations on the input G_MIR and so selection
2574 // must continue after any modification of the instruction.
2575 if (preISelLower(I)) {
2576 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2577 }
2578
2579 // There may be patterns where the importer can't deal with them optimally,
2580 // but does select it to a suboptimal sequence so our custom C++ selection
2581 // code later never has a chance to work on it. Therefore, we have an early
2582 // selection attempt here to give priority to certain selection routines
2583 // over the imported ones.
2584 if (earlySelect(I))
2585 return true;
2586
2587 if (selectImpl(I, *CoverageInfo))
2588 return true;
2589
2590 LLT Ty =
2591 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2592
2593 switch (Opcode) {
2594 case TargetOpcode::G_SBFX:
2595 case TargetOpcode::G_UBFX: {
2596 static const unsigned OpcTable[2][2] = {
2597 {AArch64::UBFMWri, AArch64::UBFMXri},
2598 {AArch64::SBFMWri, AArch64::SBFMXri}};
2599 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2600 unsigned Size = Ty.getSizeInBits();
2601 unsigned Opc = OpcTable[IsSigned][Size == 64];
2602 auto Cst1 =
2603 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2604 assert(Cst1 && "Should have gotten a constant for src 1?");
2605 auto Cst2 =
2606 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2607 assert(Cst2 && "Should have gotten a constant for src 2?");
2608 auto LSB = Cst1->Value.getZExtValue();
2609 auto Width = Cst2->Value.getZExtValue();
2610 auto BitfieldInst =
2611 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2612 .addImm(LSB)
2613 .addImm(LSB + Width - 1);
2614 I.eraseFromParent();
2615 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2616 }
2617 case TargetOpcode::G_BRCOND:
2618 return selectCompareBranch(I, MF, MRI);
2619
2620 case TargetOpcode::G_BRINDIRECT: {
2621 const Function &Fn = MF.getFunction();
2622 if (std::optional<uint16_t> BADisc =
2624 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2625 MI.addImm(AArch64PACKey::IA);
2626 MI.addImm(*BADisc);
2627 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2628 I.eraseFromParent();
2630 }
2631 I.setDesc(TII.get(AArch64::BR));
2633 }
2634
2635 case TargetOpcode::G_BRJT:
2636 return selectBrJT(I, MRI);
2637
2638 case AArch64::G_ADD_LOW: {
2639 // This op may have been separated from it's ADRP companion by the localizer
2640 // or some other code motion pass. Given that many CPUs will try to
2641 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2642 // which will later be expanded into an ADRP+ADD pair after scheduling.
2643 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2644 if (BaseMI->getOpcode() != AArch64::ADRP) {
2645 I.setDesc(TII.get(AArch64::ADDXri));
2646 I.addOperand(MachineOperand::CreateImm(0));
2648 }
2650 "Expected small code model");
2651 auto Op1 = BaseMI->getOperand(1);
2652 auto Op2 = I.getOperand(2);
2653 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2654 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2655 Op1.getTargetFlags())
2656 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2657 Op2.getTargetFlags());
2658 I.eraseFromParent();
2659 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2660 }
2661
2662 case TargetOpcode::G_FCONSTANT:
2663 case TargetOpcode::G_CONSTANT: {
2664 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2665
2666 const LLT s8 = LLT::scalar(8);
2667 const LLT s16 = LLT::scalar(16);
2668 const LLT s32 = LLT::scalar(32);
2669 const LLT s64 = LLT::scalar(64);
2670 const LLT s128 = LLT::scalar(128);
2671 const LLT p0 = LLT::pointer(0, 64);
2672
2673 const Register DefReg = I.getOperand(0).getReg();
2674 const LLT DefTy = MRI.getType(DefReg);
2675 const unsigned DefSize = DefTy.getSizeInBits();
2676 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2677
2678 // FIXME: Redundant check, but even less readable when factored out.
2679 if (isFP) {
2680 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2681 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2682 << " constant, expected: " << s16 << " or " << s32
2683 << " or " << s64 << " or " << s128 << '\n');
2684 return false;
2685 }
2686
2687 if (RB.getID() != AArch64::FPRRegBankID) {
2688 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2689 << " constant on bank: " << RB
2690 << ", expected: FPR\n");
2691 return false;
2692 }
2693
2694 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2695 // can be sure tablegen works correctly and isn't rescued by this code.
2696 // 0.0 is not covered by tablegen for FP128. So we will handle this
2697 // scenario in the code here.
2698 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2699 return false;
2700 } else {
2701 // s32 and s64 are covered by tablegen.
2702 if (Ty != p0 && Ty != s8 && Ty != s16) {
2703 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2704 << " constant, expected: " << s32 << ", " << s64
2705 << ", or " << p0 << '\n');
2706 return false;
2707 }
2708
2709 if (RB.getID() != AArch64::GPRRegBankID) {
2710 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2711 << " constant on bank: " << RB
2712 << ", expected: GPR\n");
2713 return false;
2714 }
2715 }
2716
2717 if (isFP) {
2718 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2719 // For 16, 64, and 128b values, emit a constant pool load.
2720 switch (DefSize) {
2721 default:
2722 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2723 case 32:
2724 case 64: {
2725 bool OptForSize = shouldOptForSize(&MF);
2726 const auto &TLI = MF.getSubtarget().getTargetLowering();
2727 // If TLI says that this fpimm is illegal, then we'll expand to a
2728 // constant pool load.
2729 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2730 EVT::getFloatingPointVT(DefSize), OptForSize))
2731 break;
2732 [[fallthrough]];
2733 }
2734 case 16:
2735 case 128: {
2736 auto *FPImm = I.getOperand(1).getFPImm();
2737 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2738 if (!LoadMI) {
2739 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2740 return false;
2741 }
2742 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2743 I.eraseFromParent();
2744 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2745 }
2746 }
2747
2748 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2749 // Either emit a FMOV, or emit a copy to emit a normal mov.
2750 const Register DefGPRReg = MRI.createVirtualRegister(
2751 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2752 MachineOperand &RegOp = I.getOperand(0);
2753 RegOp.setReg(DefGPRReg);
2754 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2755 MIB.buildCopy({DefReg}, {DefGPRReg});
2756
2757 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2758 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2759 return false;
2760 }
2761
2762 MachineOperand &ImmOp = I.getOperand(1);
2763 // FIXME: Is going through int64_t always correct?
2764 ImmOp.ChangeToImmediate(
2766 } else if (I.getOperand(1).isCImm()) {
2767 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2768 I.getOperand(1).ChangeToImmediate(Val);
2769 } else if (I.getOperand(1).isImm()) {
2770 uint64_t Val = I.getOperand(1).getImm();
2771 I.getOperand(1).ChangeToImmediate(Val);
2772 }
2773
2774 const unsigned MovOpc =
2775 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2776 I.setDesc(TII.get(MovOpc));
2778 return true;
2779 }
2780 case TargetOpcode::G_EXTRACT: {
2781 Register DstReg = I.getOperand(0).getReg();
2782 Register SrcReg = I.getOperand(1).getReg();
2783 LLT SrcTy = MRI.getType(SrcReg);
2784 LLT DstTy = MRI.getType(DstReg);
2785 (void)DstTy;
2786 unsigned SrcSize = SrcTy.getSizeInBits();
2787
2788 if (SrcTy.getSizeInBits() > 64) {
2789 // This should be an extract of an s128, which is like a vector extract.
2790 if (SrcTy.getSizeInBits() != 128)
2791 return false;
2792 // Only support extracting 64 bits from an s128 at the moment.
2793 if (DstTy.getSizeInBits() != 64)
2794 return false;
2795
2796 unsigned Offset = I.getOperand(2).getImm();
2797 if (Offset % 64 != 0)
2798 return false;
2799
2800 // Check we have the right regbank always.
2801 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2802 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2803 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2804
2805 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2806 auto NewI =
2807 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2808 .addUse(SrcReg, 0,
2809 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2810 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2811 AArch64::GPR64RegClass, NewI->getOperand(0));
2812 I.eraseFromParent();
2813 return true;
2814 }
2815
2816 // Emit the same code as a vector extract.
2817 // Offset must be a multiple of 64.
2818 unsigned LaneIdx = Offset / 64;
2819 MachineInstr *Extract = emitExtractVectorElt(
2820 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2821 if (!Extract)
2822 return false;
2823 I.eraseFromParent();
2824 return true;
2825 }
2826
2827 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2828 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2829 Ty.getSizeInBits() - 1);
2830
2831 if (SrcSize < 64) {
2832 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2833 "unexpected G_EXTRACT types");
2835 }
2836
2837 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2838 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2839 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2840 .addReg(DstReg, 0, AArch64::sub_32);
2841 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2842 AArch64::GPR32RegClass, MRI);
2843 I.getOperand(0).setReg(DstReg);
2844
2846 }
2847
2848 case TargetOpcode::G_INSERT: {
2849 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2850 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2851 unsigned DstSize = DstTy.getSizeInBits();
2852 // Larger inserts are vectors, same-size ones should be something else by
2853 // now (split up or turned into COPYs).
2854 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2855 return false;
2856
2857 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2858 unsigned LSB = I.getOperand(3).getImm();
2859 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2860 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2861 MachineInstrBuilder(MF, I).addImm(Width - 1);
2862
2863 if (DstSize < 64) {
2864 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2865 "unexpected G_INSERT types");
2867 }
2868
2869 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2870 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2871 TII.get(AArch64::SUBREG_TO_REG))
2872 .addDef(SrcReg)
2873 .addImm(0)
2874 .addUse(I.getOperand(2).getReg())
2875 .addImm(AArch64::sub_32);
2876 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2877 AArch64::GPR32RegClass, MRI);
2878 I.getOperand(2).setReg(SrcReg);
2879
2881 }
2882 case TargetOpcode::G_FRAME_INDEX: {
2883 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2884 if (Ty != LLT::pointer(0, 64)) {
2885 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2886 << ", expected: " << LLT::pointer(0, 64) << '\n');
2887 return false;
2888 }
2889 I.setDesc(TII.get(AArch64::ADDXri));
2890
2891 // MOs for a #0 shifted immediate.
2892 I.addOperand(MachineOperand::CreateImm(0));
2893 I.addOperand(MachineOperand::CreateImm(0));
2894
2896 }
2897
2898 case TargetOpcode::G_GLOBAL_VALUE: {
2899 const GlobalValue *GV = nullptr;
2900 unsigned OpFlags;
2901 if (I.getOperand(1).isSymbol()) {
2902 OpFlags = I.getOperand(1).getTargetFlags();
2903 // Currently only used by "RtLibUseGOT".
2904 assert(OpFlags == AArch64II::MO_GOT);
2905 } else {
2906 GV = I.getOperand(1).getGlobal();
2907 if (GV->isThreadLocal()) {
2908 // We don't support instructions with emulated TLS variables yet
2909 if (TM.useEmulatedTLS())
2910 return false;
2911 return selectTLSGlobalValue(I, MRI);
2912 }
2913 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2914 }
2915
2916 if (OpFlags & AArch64II::MO_GOT) {
2917 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2918 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2919 I.getOperand(1).setTargetFlags(OpFlags);
2920 I.addImplicitDefUseOperands(MF);
2921 } else if (TM.getCodeModel() == CodeModel::Large &&
2922 !TM.isPositionIndependent()) {
2923 // Materialize the global using movz/movk instructions.
2924 materializeLargeCMVal(I, GV, OpFlags);
2925 I.eraseFromParent();
2926 return true;
2927 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2928 I.setDesc(TII.get(AArch64::ADR));
2929 I.getOperand(1).setTargetFlags(OpFlags);
2930 } else {
2931 I.setDesc(TII.get(AArch64::MOVaddr));
2932 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2933 MachineInstrBuilder MIB(MF, I);
2934 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2936 }
2938 }
2939
2940 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2941 return selectPtrAuthGlobalValue(I, MRI);
2942
2943 case TargetOpcode::G_ZEXTLOAD:
2944 case TargetOpcode::G_LOAD:
2945 case TargetOpcode::G_STORE: {
2946 GLoadStore &LdSt = cast<GLoadStore>(I);
2947 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2948 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2949
2950 // Can only handle AddressSpace 0, 64-bit pointers.
2951 if (PtrTy != LLT::pointer(0, 64)) {
2952 return false;
2953 }
2954
2955 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2956 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2957 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2958
2959 // Need special instructions for atomics that affect ordering.
2960 if (isStrongerThanMonotonic(Order)) {
2961 assert(!isa<GZExtLoad>(LdSt));
2962 assert(MemSizeInBytes <= 8 &&
2963 "128-bit atomics should already be custom-legalized");
2964
2965 if (isa<GLoad>(LdSt)) {
2966 static constexpr unsigned LDAPROpcodes[] = {
2967 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2968 static constexpr unsigned LDAROpcodes[] = {
2969 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2970 ArrayRef<unsigned> Opcodes =
2971 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2972 ? LDAPROpcodes
2973 : LDAROpcodes;
2974 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2975 } else {
2976 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2977 AArch64::STLRW, AArch64::STLRX};
2978 Register ValReg = LdSt.getReg(0);
2979 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2980 // Emit a subreg copy of 32 bits.
2981 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2982 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2983 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2984 I.getOperand(0).setReg(NewVal);
2985 }
2986 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2987 }
2989 return true;
2990 }
2991
2992#ifndef NDEBUG
2993 const Register PtrReg = LdSt.getPointerReg();
2994 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2995 // Check that the pointer register is valid.
2996 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2997 "Load/Store pointer operand isn't a GPR");
2998 assert(MRI.getType(PtrReg).isPointer() &&
2999 "Load/Store pointer operand isn't a pointer");
3000#endif
3001
3002 const Register ValReg = LdSt.getReg(0);
3003 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3004 LLT ValTy = MRI.getType(ValReg);
3005
3006 // The code below doesn't support truncating stores, so we need to split it
3007 // again.
3008 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3009 unsigned SubReg;
3010 LLT MemTy = LdSt.getMMO().getMemoryType();
3011 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3012 if (!getSubRegForClass(RC, TRI, SubReg))
3013 return false;
3014
3015 // Generate a subreg copy.
3016 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3017 .addReg(ValReg, 0, SubReg)
3018 .getReg(0);
3019 RBI.constrainGenericRegister(Copy, *RC, MRI);
3020 LdSt.getOperand(0).setReg(Copy);
3021 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3022 // If this is an any-extending load from the FPR bank, split it into a regular
3023 // load + extend.
3024 if (RB.getID() == AArch64::FPRRegBankID) {
3025 unsigned SubReg;
3026 LLT MemTy = LdSt.getMMO().getMemoryType();
3027 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3028 if (!getSubRegForClass(RC, TRI, SubReg))
3029 return false;
3030 Register OldDst = LdSt.getReg(0);
3031 Register NewDst =
3032 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
3033 LdSt.getOperand(0).setReg(NewDst);
3034 MRI.setRegBank(NewDst, RB);
3035 // Generate a SUBREG_TO_REG to extend it.
3036 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3037 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3038 .addImm(0)
3039 .addUse(NewDst)
3040 .addImm(SubReg);
3041 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3042 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3043 MIB.setInstr(LdSt);
3044 ValTy = MemTy; // This is no longer an extending load.
3045 }
3046 }
3047
3048 // Helper lambda for partially selecting I. Either returns the original
3049 // instruction with an updated opcode, or a new instruction.
3050 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3051 bool IsStore = isa<GStore>(I);
3052 const unsigned NewOpc =
3053 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3054 if (NewOpc == I.getOpcode())
3055 return nullptr;
3056 // Check if we can fold anything into the addressing mode.
3057 auto AddrModeFns =
3058 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3059 if (!AddrModeFns) {
3060 // Can't fold anything. Use the original instruction.
3061 I.setDesc(TII.get(NewOpc));
3062 I.addOperand(MachineOperand::CreateImm(0));
3063 return &I;
3064 }
3065
3066 // Folded something. Create a new instruction and return it.
3067 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3068 Register CurValReg = I.getOperand(0).getReg();
3069 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3070 NewInst.cloneMemRefs(I);
3071 for (auto &Fn : *AddrModeFns)
3072 Fn(NewInst);
3073 I.eraseFromParent();
3074 return &*NewInst;
3075 };
3076
3077 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3078 if (!LoadStore)
3079 return false;
3080
3081 // If we're storing a 0, use WZR/XZR.
3082 if (Opcode == TargetOpcode::G_STORE) {
3084 LoadStore->getOperand(0).getReg(), MRI);
3085 if (CVal && CVal->Value == 0) {
3086 switch (LoadStore->getOpcode()) {
3087 case AArch64::STRWui:
3088 case AArch64::STRHHui:
3089 case AArch64::STRBBui:
3090 LoadStore->getOperand(0).setReg(AArch64::WZR);
3091 break;
3092 case AArch64::STRXui:
3093 LoadStore->getOperand(0).setReg(AArch64::XZR);
3094 break;
3095 }
3096 }
3097 }
3098
3099 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3100 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3101 // The any/zextload from a smaller type to i32 should be handled by the
3102 // importer.
3103 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3104 return false;
3105 // If we have an extending load then change the load's type to be a
3106 // narrower reg and zero_extend with SUBREG_TO_REG.
3107 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3108 Register DstReg = LoadStore->getOperand(0).getReg();
3109 LoadStore->getOperand(0).setReg(LdReg);
3110
3111 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3112 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3113 .addImm(0)
3114 .addUse(LdReg)
3115 .addImm(AArch64::sub_32);
3116 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3117 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3118 MRI);
3119 }
3120 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3121 }
3122
3123 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3124 case TargetOpcode::G_INDEXED_SEXTLOAD:
3125 return selectIndexedExtLoad(I, MRI);
3126 case TargetOpcode::G_INDEXED_LOAD:
3127 return selectIndexedLoad(I, MRI);
3128 case TargetOpcode::G_INDEXED_STORE:
3129 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3130
3131 case TargetOpcode::G_LSHR:
3132 case TargetOpcode::G_ASHR:
3133 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3134 return selectVectorAshrLshr(I, MRI);
3135 [[fallthrough]];
3136 case TargetOpcode::G_SHL:
3137 if (Opcode == TargetOpcode::G_SHL &&
3138 MRI.getType(I.getOperand(0).getReg()).isVector())
3139 return selectVectorSHL(I, MRI);
3140
3141 // These shifts were legalized to have 64 bit shift amounts because we
3142 // want to take advantage of the selection patterns that assume the
3143 // immediates are s64s, however, selectBinaryOp will assume both operands
3144 // will have the same bit size.
3145 {
3146 Register SrcReg = I.getOperand(1).getReg();
3147 Register ShiftReg = I.getOperand(2).getReg();
3148 const LLT ShiftTy = MRI.getType(ShiftReg);
3149 const LLT SrcTy = MRI.getType(SrcReg);
3150 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3151 ShiftTy.getSizeInBits() == 64) {
3152 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3153 // Insert a subregister copy to implement a 64->32 trunc
3154 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3155 .addReg(ShiftReg, 0, AArch64::sub_32);
3156 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3157 I.getOperand(2).setReg(Trunc.getReg(0));
3158 }
3159 }
3160 [[fallthrough]];
3161 case TargetOpcode::G_OR: {
3162 // Reject the various things we don't support yet.
3163 if (unsupportedBinOp(I, RBI, MRI, TRI))
3164 return false;
3165
3166 const unsigned OpSize = Ty.getSizeInBits();
3167
3168 const Register DefReg = I.getOperand(0).getReg();
3169 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3170
3171 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3172 if (NewOpc == I.getOpcode())
3173 return false;
3174
3175 I.setDesc(TII.get(NewOpc));
3176 // FIXME: Should the type be always reset in setDesc?
3177
3178 // Now that we selected an opcode, we need to constrain the register
3179 // operands to use appropriate classes.
3181 }
3182
3183 case TargetOpcode::G_PTR_ADD: {
3184 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3185 I.eraseFromParent();
3186 return true;
3187 }
3188
3189 case TargetOpcode::G_SADDE:
3190 case TargetOpcode::G_UADDE:
3191 case TargetOpcode::G_SSUBE:
3192 case TargetOpcode::G_USUBE:
3193 case TargetOpcode::G_SADDO:
3194 case TargetOpcode::G_UADDO:
3195 case TargetOpcode::G_SSUBO:
3196 case TargetOpcode::G_USUBO:
3197 return selectOverflowOp(I, MRI);
3198
3199 case TargetOpcode::G_PTRMASK: {
3200 Register MaskReg = I.getOperand(2).getReg();
3201 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3202 // TODO: Implement arbitrary cases
3203 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3204 return false;
3205
3206 uint64_t Mask = *MaskVal;
3207 I.setDesc(TII.get(AArch64::ANDXri));
3208 I.getOperand(2).ChangeToImmediate(
3210
3212 }
3213 case TargetOpcode::G_PTRTOINT:
3214 case TargetOpcode::G_TRUNC: {
3215 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3216 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3217
3218 const Register DstReg = I.getOperand(0).getReg();
3219 const Register SrcReg = I.getOperand(1).getReg();
3220
3221 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3222 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3223
3224 if (DstRB.getID() != SrcRB.getID()) {
3225 LLVM_DEBUG(
3226 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3227 return false;
3228 }
3229
3230 if (DstRB.getID() == AArch64::GPRRegBankID) {
3231 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3232 if (!DstRC)
3233 return false;
3234
3235 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3236 if (!SrcRC)
3237 return false;
3238
3239 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3240 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3241 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3242 return false;
3243 }
3244
3245 if (DstRC == SrcRC) {
3246 // Nothing to be done
3247 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3248 SrcTy == LLT::scalar(64)) {
3249 llvm_unreachable("TableGen can import this case");
3250 return false;
3251 } else if (DstRC == &AArch64::GPR32RegClass &&
3252 SrcRC == &AArch64::GPR64RegClass) {
3253 I.getOperand(1).setSubReg(AArch64::sub_32);
3254 } else {
3255 LLVM_DEBUG(
3256 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3257 return false;
3258 }
3259
3260 I.setDesc(TII.get(TargetOpcode::COPY));
3261 return true;
3262 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3263 if (DstTy == LLT::fixed_vector(4, 16) &&
3264 SrcTy == LLT::fixed_vector(4, 32)) {
3265 I.setDesc(TII.get(AArch64::XTNv4i16));
3267 return true;
3268 }
3269
3270 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3271 MachineInstr *Extract = emitExtractVectorElt(
3272 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3273 if (!Extract)
3274 return false;
3275 I.eraseFromParent();
3276 return true;
3277 }
3278
3279 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3280 if (Opcode == TargetOpcode::G_PTRTOINT) {
3281 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3282 I.setDesc(TII.get(TargetOpcode::COPY));
3283 return selectCopy(I, TII, MRI, TRI, RBI);
3284 }
3285 }
3286
3287 return false;
3288 }
3289
3290 case TargetOpcode::G_ANYEXT: {
3291 if (selectUSMovFromExtend(I, MRI))
3292 return true;
3293
3294 const Register DstReg = I.getOperand(0).getReg();
3295 const Register SrcReg = I.getOperand(1).getReg();
3296
3297 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3298 if (RBDst.getID() != AArch64::GPRRegBankID) {
3299 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3300 << ", expected: GPR\n");
3301 return false;
3302 }
3303
3304 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3305 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3306 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3307 << ", expected: GPR\n");
3308 return false;
3309 }
3310
3311 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3312
3313 if (DstSize == 0) {
3314 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3315 return false;
3316 }
3317
3318 if (DstSize != 64 && DstSize > 32) {
3319 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3320 << ", expected: 32 or 64\n");
3321 return false;
3322 }
3323 // At this point G_ANYEXT is just like a plain COPY, but we need
3324 // to explicitly form the 64-bit value if any.
3325 if (DstSize > 32) {
3326 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3327 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3328 .addDef(ExtSrc)
3329 .addImm(0)
3330 .addUse(SrcReg)
3331 .addImm(AArch64::sub_32);
3332 I.getOperand(1).setReg(ExtSrc);
3333 }
3334 return selectCopy(I, TII, MRI, TRI, RBI);
3335 }
3336
3337 case TargetOpcode::G_ZEXT:
3338 case TargetOpcode::G_SEXT_INREG:
3339 case TargetOpcode::G_SEXT: {
3340 if (selectUSMovFromExtend(I, MRI))
3341 return true;
3342
3343 unsigned Opcode = I.getOpcode();
3344 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3345 const Register DefReg = I.getOperand(0).getReg();
3346 Register SrcReg = I.getOperand(1).getReg();
3347 const LLT DstTy = MRI.getType(DefReg);
3348 const LLT SrcTy = MRI.getType(SrcReg);
3349 unsigned DstSize = DstTy.getSizeInBits();
3350 unsigned SrcSize = SrcTy.getSizeInBits();
3351
3352 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3353 // extended is encoded in the imm.
3354 if (Opcode == TargetOpcode::G_SEXT_INREG)
3355 SrcSize = I.getOperand(2).getImm();
3356
3357 if (DstTy.isVector())
3358 return false; // Should be handled by imported patterns.
3359
3360 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3361 AArch64::GPRRegBankID &&
3362 "Unexpected ext regbank");
3363
3364 MachineInstr *ExtI;
3365
3366 // First check if we're extending the result of a load which has a dest type
3367 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3368 // GPR register on AArch64 and all loads which are smaller automatically
3369 // zero-extend the upper bits. E.g.
3370 // %v(s8) = G_LOAD %p, :: (load 1)
3371 // %v2(s32) = G_ZEXT %v(s8)
3372 if (!IsSigned) {
3373 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3374 bool IsGPR =
3375 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3376 if (LoadMI && IsGPR) {
3377 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3378 unsigned BytesLoaded = MemOp->getSize().getValue();
3379 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3380 return selectCopy(I, TII, MRI, TRI, RBI);
3381 }
3382
3383 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3384 // + SUBREG_TO_REG.
3385 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3386 Register SubregToRegSrc =
3387 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3388 const Register ZReg = AArch64::WZR;
3389 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3390 .addImm(0);
3391
3392 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3393 .addImm(0)
3394 .addUse(SubregToRegSrc)
3395 .addImm(AArch64::sub_32);
3396
3397 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3398 MRI)) {
3399 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3400 return false;
3401 }
3402
3403 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3404 MRI)) {
3405 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3406 return false;
3407 }
3408
3409 I.eraseFromParent();
3410 return true;
3411 }
3412 }
3413
3414 if (DstSize == 64) {
3415 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3416 // FIXME: Can we avoid manually doing this?
3417 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3418 MRI)) {
3419 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3420 << " operand\n");
3421 return false;
3422 }
3423 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3424 {&AArch64::GPR64RegClass}, {})
3425 .addImm(0)
3426 .addUse(SrcReg)
3427 .addImm(AArch64::sub_32)
3428 .getReg(0);
3429 }
3430
3431 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3432 {DefReg}, {SrcReg})
3433 .addImm(0)
3434 .addImm(SrcSize - 1);
3435 } else if (DstSize <= 32) {
3436 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3437 {DefReg}, {SrcReg})
3438 .addImm(0)
3439 .addImm(SrcSize - 1);
3440 } else {
3441 return false;
3442 }
3443
3445 I.eraseFromParent();
3446 return true;
3447 }
3448
3449 case TargetOpcode::G_FREEZE:
3450 return selectCopy(I, TII, MRI, TRI, RBI);
3451
3452 case TargetOpcode::G_INTTOPTR:
3453 // The importer is currently unable to import pointer types since they
3454 // didn't exist in SelectionDAG.
3455 return selectCopy(I, TII, MRI, TRI, RBI);
3456
3457 case TargetOpcode::G_BITCAST:
3458 // Imported SelectionDAG rules can handle every bitcast except those that
3459 // bitcast from a type to the same type. Ideally, these shouldn't occur
3460 // but we might not run an optimizer that deletes them. The other exception
3461 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3462 // of them.
3463 return selectCopy(I, TII, MRI, TRI, RBI);
3464
3465 case TargetOpcode::G_SELECT: {
3466 auto &Sel = cast<GSelect>(I);
3467 const Register CondReg = Sel.getCondReg();
3468 const Register TReg = Sel.getTrueReg();
3469 const Register FReg = Sel.getFalseReg();
3470
3471 if (tryOptSelect(Sel))
3472 return true;
3473
3474 // Make sure to use an unused vreg instead of wzr, so that the peephole
3475 // optimizations will be able to optimize these.
3476 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3477 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3478 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3480 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3481 return false;
3482 Sel.eraseFromParent();
3483 return true;
3484 }
3485 case TargetOpcode::G_ICMP: {
3486 if (Ty.isVector())
3487 return false;
3488
3489 if (Ty != LLT::scalar(32)) {
3490 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3491 << ", expected: " << LLT::scalar(32) << '\n');
3492 return false;
3493 }
3494
3495 auto &PredOp = I.getOperand(1);
3496 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3497 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3499 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3500 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3501 /*Src2=*/AArch64::WZR, InvCC, MIB);
3502 I.eraseFromParent();
3503 return true;
3504 }
3505
3506 case TargetOpcode::G_FCMP: {
3507 CmpInst::Predicate Pred =
3508 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3509 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3510 Pred) ||
3511 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3512 return false;
3513 I.eraseFromParent();
3514 return true;
3515 }
3516 case TargetOpcode::G_VASTART:
3517 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3518 : selectVaStartAAPCS(I, MF, MRI);
3519 case TargetOpcode::G_INTRINSIC:
3520 return selectIntrinsic(I, MRI);
3521 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3522 return selectIntrinsicWithSideEffects(I, MRI);
3523 case TargetOpcode::G_IMPLICIT_DEF: {
3524 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3525 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3526 const Register DstReg = I.getOperand(0).getReg();
3527 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3528 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3529 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3530 return true;
3531 }
3532 case TargetOpcode::G_BLOCK_ADDR: {
3533 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3534 if (std::optional<uint16_t> BADisc =
3536 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3537 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3538 MIB.buildInstr(AArch64::MOVaddrPAC)
3539 .addBlockAddress(I.getOperand(1).getBlockAddress())
3541 .addReg(/*AddrDisc=*/AArch64::XZR)
3542 .addImm(*BADisc)
3543 .constrainAllUses(TII, TRI, RBI);
3544 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3545 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3546 AArch64::GPR64RegClass, MRI);
3547 I.eraseFromParent();
3548 return true;
3549 }
3551 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3552 I.eraseFromParent();
3553 return true;
3554 } else {
3555 I.setDesc(TII.get(AArch64::MOVaddrBA));
3556 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3557 I.getOperand(0).getReg())
3558 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3559 /* Offset */ 0, AArch64II::MO_PAGE)
3561 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3563 I.eraseFromParent();
3564 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3565 }
3566 }
3567 case AArch64::G_DUP: {
3568 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3569 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3570 // difficult because at RBS we may end up pessimizing the fpr case if we
3571 // decided to add an anyextend to fix this. Manual selection is the most
3572 // robust solution for now.
3573 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3574 AArch64::GPRRegBankID)
3575 return false; // We expect the fpr regbank case to be imported.
3576 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3577 if (VecTy == LLT::fixed_vector(8, 8))
3578 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3579 else if (VecTy == LLT::fixed_vector(16, 8))
3580 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3581 else if (VecTy == LLT::fixed_vector(4, 16))
3582 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3583 else if (VecTy == LLT::fixed_vector(8, 16))
3584 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3585 else
3586 return false;
3588 }
3589 case TargetOpcode::G_BUILD_VECTOR:
3590 return selectBuildVector(I, MRI);
3591 case TargetOpcode::G_MERGE_VALUES:
3592 return selectMergeValues(I, MRI);
3593 case TargetOpcode::G_UNMERGE_VALUES:
3594 return selectUnmergeValues(I, MRI);
3595 case TargetOpcode::G_SHUFFLE_VECTOR:
3596 return selectShuffleVector(I, MRI);
3597 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3598 return selectExtractElt(I, MRI);
3599 case TargetOpcode::G_CONCAT_VECTORS:
3600 return selectConcatVectors(I, MRI);
3601 case TargetOpcode::G_JUMP_TABLE:
3602 return selectJumpTable(I, MRI);
3603 case TargetOpcode::G_MEMCPY:
3604 case TargetOpcode::G_MEMCPY_INLINE:
3605 case TargetOpcode::G_MEMMOVE:
3606 case TargetOpcode::G_MEMSET:
3607 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3608 return selectMOPS(I, MRI);
3609 }
3610
3611 return false;
3612}
3613
3614bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3615 MachineIRBuilderState OldMIBState = MIB.getState();
3616 bool Success = select(I);
3617 MIB.setState(OldMIBState);
3618 return Success;
3619}
3620
3621bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3622 MachineRegisterInfo &MRI) {
3623 unsigned Mopcode;
3624 switch (GI.getOpcode()) {
3625 case TargetOpcode::G_MEMCPY:
3626 case TargetOpcode::G_MEMCPY_INLINE:
3627 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3628 break;
3629 case TargetOpcode::G_MEMMOVE:
3630 Mopcode = AArch64::MOPSMemoryMovePseudo;
3631 break;
3632 case TargetOpcode::G_MEMSET:
3633 // For tagged memset see llvm.aarch64.mops.memset.tag
3634 Mopcode = AArch64::MOPSMemorySetPseudo;
3635 break;
3636 }
3637
3638 auto &DstPtr = GI.getOperand(0);
3639 auto &SrcOrVal = GI.getOperand(1);
3640 auto &Size = GI.getOperand(2);
3641
3642 // Create copies of the registers that can be clobbered.
3643 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3644 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3645 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3646
3647 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3648 const auto &SrcValRegClass =
3649 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3650
3651 // Constrain to specific registers
3652 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3653 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3654 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3655
3656 MIB.buildCopy(DstPtrCopy, DstPtr);
3657 MIB.buildCopy(SrcValCopy, SrcOrVal);
3658 MIB.buildCopy(SizeCopy, Size);
3659
3660 // New instruction uses the copied registers because it must update them.
3661 // The defs are not used since they don't exist in G_MEM*. They are still
3662 // tied.
3663 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3664 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3665 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3666 if (IsSet) {
3667 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3668 {DstPtrCopy, SizeCopy, SrcValCopy});
3669 } else {
3670 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3671 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3672 {DstPtrCopy, SrcValCopy, SizeCopy});
3673 }
3674
3675 GI.eraseFromParent();
3676 return true;
3677}
3678
3679bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3680 MachineRegisterInfo &MRI) {
3681 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3682 Register JTAddr = I.getOperand(0).getReg();
3683 unsigned JTI = I.getOperand(1).getIndex();
3684 Register Index = I.getOperand(2).getReg();
3685
3686 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3687
3688 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3689 // sequence later, to guarantee the integrity of the intermediate values.
3690 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3692 if (STI.isTargetMachO()) {
3693 if (CM != CodeModel::Small && CM != CodeModel::Large)
3694 report_fatal_error("Unsupported code-model for hardened jump-table");
3695 } else {
3696 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3697 assert(STI.isTargetELF() &&
3698 "jump table hardening only supported on MachO/ELF");
3699 if (CM != CodeModel::Small)
3700 report_fatal_error("Unsupported code-model for hardened jump-table");
3701 }
3702
3703 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3704 MIB.buildInstr(AArch64::BR_JumpTable)
3705 .addJumpTableIndex(I.getOperand(1).getIndex());
3706 I.eraseFromParent();
3707 return true;
3708 }
3709
3710 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3711 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3712
3713 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3714 {TargetReg, ScratchReg}, {JTAddr, Index})
3715 .addJumpTableIndex(JTI);
3716 // Save the jump table info.
3717 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3718 {static_cast<int64_t>(JTI)});
3719 // Build the indirect branch.
3720 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3721 I.eraseFromParent();
3722 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3723}
3724
3725bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3726 MachineRegisterInfo &MRI) {
3727 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3728 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3729
3730 Register DstReg = I.getOperand(0).getReg();
3731 unsigned JTI = I.getOperand(1).getIndex();
3732 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3733 auto MovMI =
3734 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3735 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3737 I.eraseFromParent();
3738 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3739}
3740
3741bool AArch64InstructionSelector::selectTLSGlobalValue(
3742 MachineInstr &I, MachineRegisterInfo &MRI) {
3743 if (!STI.isTargetMachO())
3744 return false;
3745 MachineFunction &MF = *I.getParent()->getParent();
3746 MF.getFrameInfo().setAdjustsStack(true);
3747
3748 const auto &GlobalOp = I.getOperand(1);
3749 assert(GlobalOp.getOffset() == 0 &&
3750 "Shouldn't have an offset on TLS globals!");
3751 const GlobalValue &GV = *GlobalOp.getGlobal();
3752
3753 auto LoadGOT =
3754 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3755 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3756
3757 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3758 {LoadGOT.getReg(0)})
3759 .addImm(0);
3760
3761 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3762 // TLS calls preserve all registers except those that absolutely must be
3763 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3764 // silly).
3765 unsigned Opcode = getBLRCallOpcode(MF);
3766
3767 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3768 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3769 assert(Opcode == AArch64::BLR);
3770 Opcode = AArch64::BLRAAZ;
3771 }
3772
3773 MIB.buildInstr(Opcode, {}, {Load})
3774 .addUse(AArch64::X0, RegState::Implicit)
3775 .addDef(AArch64::X0, RegState::Implicit)
3776 .addRegMask(TRI.getTLSCallPreservedMask());
3777
3778 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3779 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3780 MRI);
3781 I.eraseFromParent();
3782 return true;
3783}
3784
3785MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3786 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3787 MachineIRBuilder &MIRBuilder) const {
3788 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3789
3790 auto BuildFn = [&](unsigned SubregIndex) {
3791 auto Ins =
3792 MIRBuilder
3793 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3794 .addImm(SubregIndex);
3797 return &*Ins;
3798 };
3799
3800 switch (EltSize) {
3801 case 8:
3802 return BuildFn(AArch64::bsub);
3803 case 16:
3804 return BuildFn(AArch64::hsub);
3805 case 32:
3806 return BuildFn(AArch64::ssub);
3807 case 64:
3808 return BuildFn(AArch64::dsub);
3809 default:
3810 return nullptr;
3811 }
3812}
3813
3814MachineInstr *
3815AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3816 MachineIRBuilder &MIB,
3817 MachineRegisterInfo &MRI) const {
3818 LLT DstTy = MRI.getType(DstReg);
3819 const TargetRegisterClass *RC =
3820 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3821 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3822 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3823 return nullptr;
3824 }
3825 unsigned SubReg = 0;
3826 if (!getSubRegForClass(RC, TRI, SubReg))
3827 return nullptr;
3828 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3829 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3830 << DstTy.getSizeInBits() << "\n");
3831 return nullptr;
3832 }
3833 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3834 .addReg(SrcReg, 0, SubReg);
3835 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3836 return Copy;
3837}
3838
3839bool AArch64InstructionSelector::selectMergeValues(
3840 MachineInstr &I, MachineRegisterInfo &MRI) {
3841 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3842 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3843 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3844 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3845 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3846
3847 if (I.getNumOperands() != 3)
3848 return false;
3849
3850 // Merging 2 s64s into an s128.
3851 if (DstTy == LLT::scalar(128)) {
3852 if (SrcTy.getSizeInBits() != 64)
3853 return false;
3854 Register DstReg = I.getOperand(0).getReg();
3855 Register Src1Reg = I.getOperand(1).getReg();
3856 Register Src2Reg = I.getOperand(2).getReg();
3857 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3858 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3859 /* LaneIdx */ 0, RB, MIB);
3860 if (!InsMI)
3861 return false;
3862 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3863 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3864 if (!Ins2MI)
3865 return false;
3868 I.eraseFromParent();
3869 return true;
3870 }
3871
3872 if (RB.getID() != AArch64::GPRRegBankID)
3873 return false;
3874
3875 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3876 return false;
3877
3878 auto *DstRC = &AArch64::GPR64RegClass;
3879 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3880 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3881 TII.get(TargetOpcode::SUBREG_TO_REG))
3882 .addDef(SubToRegDef)
3883 .addImm(0)
3884 .addUse(I.getOperand(1).getReg())
3885 .addImm(AArch64::sub_32);
3886 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3887 // Need to anyext the second scalar before we can use bfm
3888 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3889 TII.get(TargetOpcode::SUBREG_TO_REG))
3890 .addDef(SubToRegDef2)
3891 .addImm(0)
3892 .addUse(I.getOperand(2).getReg())
3893 .addImm(AArch64::sub_32);
3894 MachineInstr &BFM =
3895 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3896 .addDef(I.getOperand(0).getReg())
3897 .addUse(SubToRegDef)
3898 .addUse(SubToRegDef2)
3899 .addImm(32)
3900 .addImm(31);
3901 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3902 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3904 I.eraseFromParent();
3905 return true;
3906}
3907
3908static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3909 const unsigned EltSize) {
3910 // Choose a lane copy opcode and subregister based off of the size of the
3911 // vector's elements.
3912 switch (EltSize) {
3913 case 8:
3914 CopyOpc = AArch64::DUPi8;
3915 ExtractSubReg = AArch64::bsub;
3916 break;
3917 case 16:
3918 CopyOpc = AArch64::DUPi16;
3919 ExtractSubReg = AArch64::hsub;
3920 break;
3921 case 32:
3922 CopyOpc = AArch64::DUPi32;
3923 ExtractSubReg = AArch64::ssub;
3924 break;
3925 case 64:
3926 CopyOpc = AArch64::DUPi64;
3927 ExtractSubReg = AArch64::dsub;
3928 break;
3929 default:
3930 // Unknown size, bail out.
3931 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3932 return false;
3933 }
3934 return true;
3935}
3936
3937MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3938 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3939 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3940 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3941 unsigned CopyOpc = 0;
3942 unsigned ExtractSubReg = 0;
3943 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3944 LLVM_DEBUG(
3945 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3946 return nullptr;
3947 }
3948
3949 const TargetRegisterClass *DstRC =
3950 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3951 if (!DstRC) {
3952 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3953 return nullptr;
3954 }
3955
3956 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3957 const LLT &VecTy = MRI.getType(VecReg);
3958 const TargetRegisterClass *VecRC =
3959 getRegClassForTypeOnBank(VecTy, VecRB, true);
3960 if (!VecRC) {
3961 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3962 return nullptr;
3963 }
3964
3965 // The register that we're going to copy into.
3966 Register InsertReg = VecReg;
3967 if (!DstReg)
3968 DstReg = MRI.createVirtualRegister(DstRC);
3969 // If the lane index is 0, we just use a subregister COPY.
3970 if (LaneIdx == 0) {
3971 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3972 .addReg(VecReg, 0, ExtractSubReg);
3973 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3974 return &*Copy;
3975 }
3976
3977 // Lane copies require 128-bit wide registers. If we're dealing with an
3978 // unpacked vector, then we need to move up to that width. Insert an implicit
3979 // def and a subregister insert to get us there.
3980 if (VecTy.getSizeInBits() != 128) {
3981 MachineInstr *ScalarToVector = emitScalarToVector(
3982 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3983 if (!ScalarToVector)
3984 return nullptr;
3985 InsertReg = ScalarToVector->getOperand(0).getReg();
3986 }
3987
3988 MachineInstr *LaneCopyMI =
3989 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3990 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3991
3992 // Make sure that we actually constrain the initial copy.
3993 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3994 return LaneCopyMI;
3995}
3996
3997bool AArch64InstructionSelector::selectExtractElt(
3998 MachineInstr &I, MachineRegisterInfo &MRI) {
3999 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4000 "unexpected opcode!");
4001 Register DstReg = I.getOperand(0).getReg();
4002 const LLT NarrowTy = MRI.getType(DstReg);
4003 const Register SrcReg = I.getOperand(1).getReg();
4004 const LLT WideTy = MRI.getType(SrcReg);
4005 (void)WideTy;
4006 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4007 "source register size too small!");
4008 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4009
4010 // Need the lane index to determine the correct copy opcode.
4011 MachineOperand &LaneIdxOp = I.getOperand(2);
4012 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4013
4014 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4015 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4016 return false;
4017 }
4018
4019 // Find the index to extract from.
4020 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4021 if (!VRegAndVal)
4022 return false;
4023 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4024
4025
4026 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4027 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4028 LaneIdx, MIB);
4029 if (!Extract)
4030 return false;
4031
4032 I.eraseFromParent();
4033 return true;
4034}
4035
4036bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4037 MachineInstr &I, MachineRegisterInfo &MRI) {
4038 unsigned NumElts = I.getNumOperands() - 1;
4039 Register SrcReg = I.getOperand(NumElts).getReg();
4040 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4041 const LLT SrcTy = MRI.getType(SrcReg);
4042
4043 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4044 if (SrcTy.getSizeInBits() > 128) {
4045 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4046 return false;
4047 }
4048
4049 // We implement a split vector operation by treating the sub-vectors as
4050 // scalars and extracting them.
4051 const RegisterBank &DstRB =
4052 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4053 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4054 Register Dst = I.getOperand(OpIdx).getReg();
4055 MachineInstr *Extract =
4056 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4057 if (!Extract)
4058 return false;
4059 }
4060 I.eraseFromParent();
4061 return true;
4062}
4063
4064bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4065 MachineRegisterInfo &MRI) {
4066 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4067 "unexpected opcode");
4068
4069 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4070 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4071 AArch64::FPRRegBankID ||
4072 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4073 AArch64::FPRRegBankID) {
4074 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4075 "currently unsupported.\n");
4076 return false;
4077 }
4078
4079 // The last operand is the vector source register, and every other operand is
4080 // a register to unpack into.
4081 unsigned NumElts = I.getNumOperands() - 1;
4082 Register SrcReg = I.getOperand(NumElts).getReg();
4083 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4084 const LLT WideTy = MRI.getType(SrcReg);
4085 (void)WideTy;
4086 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4087 "can only unmerge from vector or s128 types!");
4088 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4089 "source register size too small!");
4090
4091 if (!NarrowTy.isScalar())
4092 return selectSplitVectorUnmerge(I, MRI);
4093
4094 // Choose a lane copy opcode and subregister based off of the size of the
4095 // vector's elements.
4096 unsigned CopyOpc = 0;
4097 unsigned ExtractSubReg = 0;
4098 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4099 return false;
4100
4101 // Set up for the lane copies.
4102 MachineBasicBlock &MBB = *I.getParent();
4103
4104 // Stores the registers we'll be copying from.
4105 SmallVector<Register, 4> InsertRegs;
4106
4107 // We'll use the first register twice, so we only need NumElts-1 registers.
4108 unsigned NumInsertRegs = NumElts - 1;
4109
4110 // If our elements fit into exactly 128 bits, then we can copy from the source
4111 // directly. Otherwise, we need to do a bit of setup with some subregister
4112 // inserts.
4113 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4114 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4115 } else {
4116 // No. We have to perform subregister inserts. For each insert, create an
4117 // implicit def and a subregister insert, and save the register we create.
4118 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4119 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4120 *RBI.getRegBank(SrcReg, MRI, TRI));
4121 unsigned SubReg = 0;
4122 bool Found = getSubRegForClass(RC, TRI, SubReg);
4123 (void)Found;
4124 assert(Found && "expected to find last operand's subeg idx");
4125 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4126 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4127 MachineInstr &ImpDefMI =
4128 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4129 ImpDefReg);
4130
4131 // Now, create the subregister insert from SrcReg.
4132 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4133 MachineInstr &InsMI =
4134 *BuildMI(MBB, I, I.getDebugLoc(),
4135 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4136 .addUse(ImpDefReg)
4137 .addUse(SrcReg)
4138 .addImm(SubReg);
4139
4140 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4142
4143 // Save the register so that we can copy from it after.
4144 InsertRegs.push_back(InsertReg);
4145 }
4146 }
4147
4148 // Now that we've created any necessary subregister inserts, we can
4149 // create the copies.
4150 //
4151 // Perform the first copy separately as a subregister copy.
4152 Register CopyTo = I.getOperand(0).getReg();
4153 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4154 .addReg(InsertRegs[0], 0, ExtractSubReg);
4155 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4156
4157 // Now, perform the remaining copies as vector lane copies.
4158 unsigned LaneIdx = 1;
4159 for (Register InsReg : InsertRegs) {
4160 Register CopyTo = I.getOperand(LaneIdx).getReg();
4161 MachineInstr &CopyInst =
4162 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4163 .addUse(InsReg)
4164 .addImm(LaneIdx);
4165 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4166 ++LaneIdx;
4167 }
4168
4169 // Separately constrain the first copy's destination. Because of the
4170 // limitation in constrainOperandRegClass, we can't guarantee that this will
4171 // actually be constrained. So, do it ourselves using the second operand.
4172 const TargetRegisterClass *RC =
4173 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4174 if (!RC) {
4175 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4176 return false;
4177 }
4178
4179 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4180 I.eraseFromParent();
4181 return true;
4182}
4183
4184bool AArch64InstructionSelector::selectConcatVectors(
4185 MachineInstr &I, MachineRegisterInfo &MRI) {
4186 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4187 "Unexpected opcode");
4188 Register Dst = I.getOperand(0).getReg();
4189 Register Op1 = I.getOperand(1).getReg();
4190 Register Op2 = I.getOperand(2).getReg();
4191 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4192 if (!ConcatMI)
4193 return false;
4194 I.eraseFromParent();
4195 return true;
4196}
4197
4198unsigned
4199AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4200 MachineFunction &MF) const {
4201 Type *CPTy = CPVal->getType();
4202 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4203
4204 MachineConstantPool *MCP = MF.getConstantPool();
4205 return MCP->getConstantPoolIndex(CPVal, Alignment);
4206}
4207
4208MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4209 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4210 const TargetRegisterClass *RC;
4211 unsigned Opc;
4212 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4213 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4214 switch (Size) {
4215 case 16:
4216 RC = &AArch64::FPR128RegClass;
4217 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4218 break;
4219 case 8:
4220 RC = &AArch64::FPR64RegClass;
4221 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4222 break;
4223 case 4:
4224 RC = &AArch64::FPR32RegClass;
4225 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4226 break;
4227 case 2:
4228 RC = &AArch64::FPR16RegClass;
4229 Opc = AArch64::LDRHui;
4230 break;
4231 default:
4232 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4233 << *CPVal->getType());
4234 return nullptr;
4235 }
4236
4237 MachineInstr *LoadMI = nullptr;
4238 auto &MF = MIRBuilder.getMF();
4239 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4240 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4241 // Use load(literal) for tiny code model.
4242 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4243 } else {
4244 auto Adrp =
4245 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4246 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4247
4248 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4249 .addConstantPoolIndex(
4251
4253 }
4254
4255 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4256 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4258 Size, Align(Size)));
4260 return LoadMI;
4261}
4262
4263/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4264/// size and RB.
4265static std::pair<unsigned, unsigned>
4266getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4267 unsigned Opc, SubregIdx;
4268 if (RB.getID() == AArch64::GPRRegBankID) {
4269 if (EltSize == 8) {
4270 Opc = AArch64::INSvi8gpr;
4271 SubregIdx = AArch64::bsub;
4272 } else if (EltSize == 16) {
4273 Opc = AArch64::INSvi16gpr;
4274 SubregIdx = AArch64::ssub;
4275 } else if (EltSize == 32) {
4276 Opc = AArch64::INSvi32gpr;
4277 SubregIdx = AArch64::ssub;
4278 } else if (EltSize == 64) {
4279 Opc = AArch64::INSvi64gpr;
4280 SubregIdx = AArch64::dsub;
4281 } else {
4282 llvm_unreachable("invalid elt size!");
4283 }
4284 } else {
4285 if (EltSize == 8) {
4286 Opc = AArch64::INSvi8lane;
4287 SubregIdx = AArch64::bsub;
4288 } else if (EltSize == 16) {
4289 Opc = AArch64::INSvi16lane;
4290 SubregIdx = AArch64::hsub;
4291 } else if (EltSize == 32) {
4292 Opc = AArch64::INSvi32lane;
4293 SubregIdx = AArch64::ssub;
4294 } else if (EltSize == 64) {
4295 Opc = AArch64::INSvi64lane;
4296 SubregIdx = AArch64::dsub;
4297 } else {
4298 llvm_unreachable("invalid elt size!");
4299 }
4300 }
4301 return std::make_pair(Opc, SubregIdx);
4302}
4303
4304MachineInstr *AArch64InstructionSelector::emitInstr(
4305 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4306 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4307 const ComplexRendererFns &RenderFns) const {
4308 assert(Opcode && "Expected an opcode?");
4309 assert(!isPreISelGenericOpcode(Opcode) &&
4310 "Function should only be used to produce selected instructions!");
4311 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4312 if (RenderFns)
4313 for (auto &Fn : *RenderFns)
4314 Fn(MI);
4316 return &*MI;
4317}
4318
4319MachineInstr *AArch64InstructionSelector::emitAddSub(
4320 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4321 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4322 MachineIRBuilder &MIRBuilder) const {
4323 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4324 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4325 auto Ty = MRI.getType(LHS.getReg());
4326 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4327 unsigned Size = Ty.getSizeInBits();
4328 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4329 bool Is32Bit = Size == 32;
4330
4331 // INSTRri form with positive arithmetic immediate.
4332 if (auto Fns = selectArithImmed(RHS))
4333 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4334 MIRBuilder, Fns);
4335
4336 // INSTRri form with negative arithmetic immediate.
4337 if (auto Fns = selectNegArithImmed(RHS))
4338 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4339 MIRBuilder, Fns);
4340
4341 // INSTRrx form.
4342 if (auto Fns = selectArithExtendedRegister(RHS))
4343 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4344 MIRBuilder, Fns);
4345
4346 // INSTRrs form.
4347 if (auto Fns = selectShiftedRegister(RHS))
4348 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4349 MIRBuilder, Fns);
4350 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4351 MIRBuilder);
4352}
4353
4354MachineInstr *
4355AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4356 MachineOperand &RHS,
4357 MachineIRBuilder &MIRBuilder) const {
4358 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4359 {{AArch64::ADDXri, AArch64::ADDWri},
4360 {AArch64::ADDXrs, AArch64::ADDWrs},
4361 {AArch64::ADDXrr, AArch64::ADDWrr},
4362 {AArch64::SUBXri, AArch64::SUBWri},
4363 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4364 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4365}
4366
4367MachineInstr *
4368AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4369 MachineOperand &RHS,
4370 MachineIRBuilder &MIRBuilder) const {
4371 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4372 {{AArch64::ADDSXri, AArch64::ADDSWri},
4373 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4374 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4375 {AArch64::SUBSXri, AArch64::SUBSWri},
4376 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4377 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4378}
4379
4380MachineInstr *
4381AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4382 MachineOperand &RHS,
4383 MachineIRBuilder &MIRBuilder) const {
4384 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4385 {{AArch64::SUBSXri, AArch64::SUBSWri},
4386 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4387 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4388 {AArch64::ADDSXri, AArch64::ADDSWri},
4389 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4390 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4391}
4392
4393MachineInstr *
4394AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4395 MachineOperand &RHS,
4396 MachineIRBuilder &MIRBuilder) const {
4397 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4398 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4399 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4400 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4401 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4402}
4403
4404MachineInstr *
4405AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4406 MachineOperand &RHS,
4407 MachineIRBuilder &MIRBuilder) const {
4408 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4409 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4410 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4411 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4412 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4413}
4414
4415MachineInstr *
4416AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4417 MachineIRBuilder &MIRBuilder) const {
4418 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4419 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4420 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4421 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4422}
4423
4424MachineInstr *
4425AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4426 MachineIRBuilder &MIRBuilder) const {
4427 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4428 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4429 LLT Ty = MRI.getType(LHS.getReg());
4430 unsigned RegSize = Ty.getSizeInBits();
4431 bool Is32Bit = (RegSize == 32);
4432 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4433 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4434 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4435 // ANDS needs a logical immediate for its immediate form. Check if we can
4436 // fold one in.
4437 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4438 int64_t Imm = ValAndVReg->Value.getSExtValue();
4439
4441 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4444 return &*TstMI;
4445 }
4446 }
4447
4448 if (auto Fns = selectLogicalShiftedRegister(RHS))
4449 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4450 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4451}
4452
4453MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4454 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4455 MachineIRBuilder &MIRBuilder) const {
4456 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4457 assert(Predicate.isPredicate() && "Expected predicate?");
4458 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4459 LLT CmpTy = MRI.getType(LHS.getReg());
4460 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4461 unsigned Size = CmpTy.getSizeInBits();
4462 (void)Size;
4463 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4464 // Fold the compare into a cmn or tst if possible.
4465 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4466 return FoldCmp;
4467 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4468 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4469}
4470
4471MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4472 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4473 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4474#ifndef NDEBUG
4475 LLT Ty = MRI.getType(Dst);
4476 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4477 "Expected a 32-bit scalar register?");
4478#endif
4479 const Register ZReg = AArch64::WZR;
4480 AArch64CC::CondCode CC1, CC2;
4481 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4482 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4483 if (CC2 == AArch64CC::AL)
4484 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4485 MIRBuilder);
4486 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4487 Register Def1Reg = MRI.createVirtualRegister(RC);
4488 Register Def2Reg = MRI.createVirtualRegister(RC);
4489 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4490 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4491 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4492 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4494 return &*OrMI;
4495}
4496
4497MachineInstr *AArch64InstructionSelector::emitFPCompare(
4498 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4499 std::optional<CmpInst::Predicate> Pred) const {
4500 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4501 LLT Ty = MRI.getType(LHS);
4502 if (Ty.isVector())
4503 return nullptr;
4504 unsigned OpSize = Ty.getSizeInBits();
4505 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4506
4507 // If this is a compare against +0.0, then we don't have
4508 // to explicitly materialize a constant.
4509 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4510 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4511
4512 auto IsEqualityPred = [](CmpInst::Predicate P) {
4513 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4515 };
4516 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4517 // Try commutating the operands.
4518 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4519 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4520 ShouldUseImm = true;
4521 std::swap(LHS, RHS);
4522 }
4523 }
4524 unsigned CmpOpcTbl[2][3] = {
4525 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4526 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4527 unsigned CmpOpc =
4528 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4529
4530 // Partially build the compare. Decide if we need to add a use for the
4531 // third operand based off whether or not we're comparing against 0.0.
4532 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4534 if (!ShouldUseImm)
4535 CmpMI.addUse(RHS);
4537 return &*CmpMI;
4538}
4539
4540MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4541 std::optional<Register> Dst, Register Op1, Register Op2,
4542 MachineIRBuilder &MIRBuilder) const {
4543 // We implement a vector concat by:
4544 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4545 // 2. Insert the upper vector into the destination's upper element
4546 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4547 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4548
4549 const LLT Op1Ty = MRI.getType(Op1);
4550 const LLT Op2Ty = MRI.getType(Op2);
4551
4552 if (Op1Ty != Op2Ty) {
4553 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4554 return nullptr;
4555 }
4556 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4557
4558 if (Op1Ty.getSizeInBits() >= 128) {
4559 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4560 return nullptr;
4561 }
4562
4563 // At the moment we just support 64 bit vector concats.
4564 if (Op1Ty.getSizeInBits() != 64) {
4565 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4566 return nullptr;
4567 }
4568
4569 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4570 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4571 const TargetRegisterClass *DstRC =
4572 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4573
4574 MachineInstr *WidenedOp1 =
4575 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4576 MachineInstr *WidenedOp2 =
4577 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4578 if (!WidenedOp1 || !WidenedOp2) {
4579 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4580 return nullptr;
4581 }
4582
4583 // Now do the insert of the upper element.
4584 unsigned InsertOpc, InsSubRegIdx;
4585 std::tie(InsertOpc, InsSubRegIdx) =
4586 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4587
4588 if (!Dst)
4589 Dst = MRI.createVirtualRegister(DstRC);
4590 auto InsElt =
4591 MIRBuilder
4592 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4593 .addImm(1) /* Lane index */
4594 .addUse(WidenedOp2->getOperand(0).getReg())
4595 .addImm(0);
4597 return &*InsElt;
4598}
4599
4600MachineInstr *
4601AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4602 Register Src2, AArch64CC::CondCode Pred,
4603 MachineIRBuilder &MIRBuilder) const {
4604 auto &MRI = *MIRBuilder.getMRI();
4605 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4606 // If we used a register class, then this won't necessarily have an LLT.
4607 // Compute the size based off whether or not we have a class or bank.
4608 unsigned Size;
4609 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4610 Size = TRI.getRegSizeInBits(*RC);
4611 else
4612 Size = MRI.getType(Dst).getSizeInBits();
4613 // Some opcodes use s1.
4614 assert(Size <= 64 && "Expected 64 bits or less only!");
4615 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4616 unsigned Opc = OpcTable[Size == 64];
4617 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4619 return &*CSINC;
4620}
4621
4622MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4623 Register CarryReg) {
4624 MachineRegisterInfo *MRI = MIB.getMRI();
4625 unsigned Opcode = I.getOpcode();
4626
4627 // If the instruction is a SUB, we need to negate the carry,
4628 // because borrowing is indicated by carry-flag == 0.
4629 bool NeedsNegatedCarry =
4630 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4631
4632 // If the previous instruction will already produce the correct carry, do not
4633 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4634 // generated during legalization of wide add/sub. This optimization depends on
4635 // these sequences not being interrupted by other instructions.
4636 // We have to select the previous instruction before the carry-using
4637 // instruction is deleted by the calling function, otherwise the previous
4638 // instruction might become dead and would get deleted.
4639 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4640 if (SrcMI == I.getPrevNode()) {
4641 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4642 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4643 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4644 CarrySrcMI->isUnsigned() &&
4645 CarrySrcMI->getCarryOutReg() == CarryReg &&
4646 selectAndRestoreState(*SrcMI))
4647 return nullptr;
4648 }
4649 }
4650
4651 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4652
4653 if (NeedsNegatedCarry) {
4654 // (0 - Carry) sets !C in NZCV when Carry == 1
4655 Register ZReg = AArch64::WZR;
4656 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4657 }
4658
4659 // (Carry - 1) sets !C in NZCV when Carry == 0
4660 auto Fns = select12BitValueWithLeftShift(1);
4661 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4662}
4663
4664bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4665 MachineRegisterInfo &MRI) {
4666 auto &CarryMI = cast<GAddSubCarryOut>(I);
4667
4668 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4669 // Set NZCV carry according to carry-in VReg
4670 emitCarryIn(I, CarryInMI->getCarryInReg());
4671 }
4672
4673 // Emit the operation and get the correct condition code.
4674 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4675 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4676
4677 Register CarryOutReg = CarryMI.getCarryOutReg();
4678
4679 // Don't convert carry-out to VReg if it is never used
4680 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4681 // Now, put the overflow result in the register given by the first operand
4682 // to the overflow op. CSINC increments the result when the predicate is
4683 // false, so to get the increment when it's true, we need to use the
4684 // inverse. In this case, we want to increment when carry is set.
4685 Register ZReg = AArch64::WZR;
4686 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4687 getInvertedCondCode(OpAndCC.second), MIB);
4688 }
4689
4690 I.eraseFromParent();
4691 return true;
4692}
4693
4694std::pair<MachineInstr *, AArch64CC::CondCode>
4695AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4696 MachineOperand &LHS,
4697 MachineOperand &RHS,
4698 MachineIRBuilder &MIRBuilder) const {
4699 switch (Opcode) {
4700 default:
4701 llvm_unreachable("Unexpected opcode!");
4702 case TargetOpcode::G_SADDO:
4703 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4704 case TargetOpcode::G_UADDO:
4705 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4706 case TargetOpcode::G_SSUBO:
4707 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4708 case TargetOpcode::G_USUBO:
4709 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4710 case TargetOpcode::G_SADDE:
4711 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4712 case TargetOpcode::G_UADDE:
4713 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4714 case TargetOpcode::G_SSUBE:
4715 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4716 case TargetOpcode::G_USUBE:
4717 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4718 }
4719}
4720
4721/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4722/// expressed as a conjunction.
4723/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4724/// changing the conditions on the CMP tests.
4725/// (this means we can call emitConjunctionRec() with
4726/// Negate==true on this sub-tree)
4727/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4728/// cannot do the negation naturally. We are required to
4729/// emit the subtree first in this case.
4730/// \param WillNegate Is true if are called when the result of this
4731/// subexpression must be negated. This happens when the
4732/// outer expression is an OR. We can use this fact to know
4733/// that we have a double negation (or (or ...) ...) that
4734/// can be implemented for free.
4735static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4736 bool WillNegate, MachineRegisterInfo &MRI,
4737 unsigned Depth = 0) {
4738 if (!MRI.hasOneNonDBGUse(Val))
4739 return false;
4740 MachineInstr *ValDef = MRI.getVRegDef(Val);
4741 unsigned Opcode = ValDef->getOpcode();
4742 if (isa<GAnyCmp>(ValDef)) {
4743 CanNegate = true;
4744 MustBeFirst = false;
4745 return true;
4746 }
4747 // Protect against exponential runtime and stack overflow.
4748 if (Depth > 6)
4749 return false;
4750 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4751 bool IsOR = Opcode == TargetOpcode::G_OR;
4752 Register O0 = ValDef->getOperand(1).getReg();
4753 Register O1 = ValDef->getOperand(2).getReg();
4754 bool CanNegateL;
4755 bool MustBeFirstL;
4756 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4757 return false;
4758 bool CanNegateR;
4759 bool MustBeFirstR;
4760 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4761 return false;
4762
4763 if (MustBeFirstL && MustBeFirstR)
4764 return false;
4765
4766 if (IsOR) {
4767 // For an OR expression we need to be able to naturally negate at least
4768 // one side or we cannot do the transformation at all.
4769 if (!CanNegateL && !CanNegateR)
4770 return false;
4771 // If we the result of the OR will be negated and we can naturally negate
4772 // the leaves, then this sub-tree as a whole negates naturally.
4773 CanNegate = WillNegate && CanNegateL && CanNegateR;
4774 // If we cannot naturally negate the whole sub-tree, then this must be
4775 // emitted first.
4776 MustBeFirst = !CanNegate;
4777 } else {
4778 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4779 // We cannot naturally negate an AND operation.
4780 CanNegate = false;
4781 MustBeFirst = MustBeFirstL || MustBeFirstR;
4782 }
4783 return true;
4784 }
4785 return false;
4786}
4787
4788MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4791 MachineIRBuilder &MIB) const {
4792 auto &MRI = *MIB.getMRI();
4793 LLT OpTy = MRI.getType(LHS);
4794 unsigned CCmpOpc;
4795 std::optional<ValueAndVReg> C;
4796 if (CmpInst::isIntPredicate(CC)) {
4797 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4799 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4800 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4801 else if (C->Value.ule(31))
4802 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4803 else
4804 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4805 } else {
4806 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4807 OpTy.getSizeInBits() == 64);
4808 switch (OpTy.getSizeInBits()) {
4809 case 16:
4810 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4811 CCmpOpc = AArch64::FCCMPHrr;
4812 break;
4813 case 32:
4814 CCmpOpc = AArch64::FCCMPSrr;
4815 break;
4816 case 64:
4817 CCmpOpc = AArch64::FCCMPDrr;
4818 break;
4819 default:
4820 return nullptr;
4821 }
4822 }
4824 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4825 auto CCmp =
4826 MIB.buildInstr(CCmpOpc, {}, {LHS});
4827 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4828 CCmp.addImm(C->Value.getZExtValue());
4829 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4830 CCmp.addImm(C->Value.abs().getZExtValue());
4831 else
4832 CCmp.addReg(RHS);
4833 CCmp.addImm(NZCV).addImm(Predicate);
4835 return &*CCmp;
4836}
4837
4838MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4839 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4840 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4841 // We're at a tree leaf, produce a conditional comparison operation.
4842 auto &MRI = *MIB.getMRI();
4843 MachineInstr *ValDef = MRI.getVRegDef(Val);
4844 unsigned Opcode = ValDef->getOpcode();
4845 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4846 Register LHS = Cmp->getLHSReg();
4847 Register RHS = Cmp->getRHSReg();
4848 CmpInst::Predicate CC = Cmp->getCond();
4849 if (Negate)
4851 if (isa<GICmp>(Cmp)) {
4852 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4853 } else {
4854 // Handle special FP cases.
4855 AArch64CC::CondCode ExtraCC;
4856 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4857 // Some floating point conditions can't be tested with a single condition
4858 // code. Construct an additional comparison in this case.
4859 if (ExtraCC != AArch64CC::AL) {
4860 MachineInstr *ExtraCmp;
4861 if (!CCOp)
4862 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4863 else
4864 ExtraCmp =
4865 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4866 CCOp = ExtraCmp->getOperand(0).getReg();
4867 Predicate = ExtraCC;
4868 }
4869 }
4870
4871 // Produce a normal comparison if we are first in the chain
4872 if (!CCOp) {
4873 auto Dst = MRI.cloneVirtualRegister(LHS);
4874 if (isa<GICmp>(Cmp))
4875 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4876 return emitFPCompare(Cmp->getOperand(2).getReg(),
4877 Cmp->getOperand(3).getReg(), MIB);
4878 }
4879 // Otherwise produce a ccmp.
4880 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4881 }
4882 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4883
4884 bool IsOR = Opcode == TargetOpcode::G_OR;
4885
4886 Register LHS = ValDef->getOperand(1).getReg();
4887 bool CanNegateL;
4888 bool MustBeFirstL;
4889 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4890 assert(ValidL && "Valid conjunction/disjunction tree");
4891 (void)ValidL;
4892
4893 Register RHS = ValDef->getOperand(2).getReg();
4894 bool CanNegateR;
4895 bool MustBeFirstR;
4896 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4897 assert(ValidR && "Valid conjunction/disjunction tree");
4898 (void)ValidR;
4899
4900 // Swap sub-tree that must come first to the right side.
4901 if (MustBeFirstL) {
4902 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4903 std::swap(LHS, RHS);
4904 std::swap(CanNegateL, CanNegateR);
4905 std::swap(MustBeFirstL, MustBeFirstR);
4906 }
4907
4908 bool NegateR;
4909 bool NegateAfterR;
4910 bool NegateL;
4911 bool NegateAfterAll;
4912 if (Opcode == TargetOpcode::G_OR) {
4913 // Swap the sub-tree that we can negate naturally to the left.
4914 if (!CanNegateL) {
4915 assert(CanNegateR && "at least one side must be negatable");
4916 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4917 assert(!Negate);
4918 std::swap(LHS, RHS);
4919 NegateR = false;
4920 NegateAfterR = true;
4921 } else {
4922 // Negate the left sub-tree if possible, otherwise negate the result.
4923 NegateR = CanNegateR;
4924 NegateAfterR = !CanNegateR;
4925 }
4926 NegateL = true;
4927 NegateAfterAll = !Negate;
4928 } else {
4929 assert(Opcode == TargetOpcode::G_AND &&
4930 "Valid conjunction/disjunction tree");
4931 assert(!Negate && "Valid conjunction/disjunction tree");
4932
4933 NegateL = false;
4934 NegateR = false;
4935 NegateAfterR = false;
4936 NegateAfterAll = false;
4937 }
4938
4939 // Emit sub-trees.
4940 AArch64CC::CondCode RHSCC;
4941 MachineInstr *CmpR =
4942 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4943 if (NegateAfterR)
4944 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4945 MachineInstr *CmpL = emitConjunctionRec(
4946 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4947 if (NegateAfterAll)
4948 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4949 return CmpL;
4950}
4951
4952MachineInstr *AArch64InstructionSelector::emitConjunction(
4953 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4954 bool DummyCanNegate;
4955 bool DummyMustBeFirst;
4956 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4957 *MIB.getMRI()))
4958 return nullptr;
4959 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4960}
4961
4962bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4963 MachineInstr &CondMI) {
4964 AArch64CC::CondCode AArch64CC;
4965 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4966 if (!ConjMI)
4967 return false;
4968
4969 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4970 SelI.eraseFromParent();
4971 return true;
4972}
4973
4974bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4975 MachineRegisterInfo &MRI = *MIB.getMRI();
4976 // We want to recognize this pattern:
4977 //
4978 // $z = G_FCMP pred, $x, $y
4979 // ...
4980 // $w = G_SELECT $z, $a, $b
4981 //
4982 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4983 // some copies/truncs in between.)
4984 //
4985 // If we see this, then we can emit something like this:
4986 //
4987 // fcmp $x, $y
4988 // fcsel $w, $a, $b, pred
4989 //
4990 // Rather than emitting both of the rather long sequences in the standard
4991 // G_FCMP/G_SELECT select methods.
4992
4993 // First, check if the condition is defined by a compare.
4994 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4995
4996 // We can only fold if all of the defs have one use.
4997 Register CondDefReg = CondDef->getOperand(0).getReg();
4998 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4999 // Unless it's another select.
5000 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5001 if (CondDef == &UI)
5002 continue;
5003 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5004 return false;
5005 }
5006 }
5007
5008 // Is the condition defined by a compare?
5009 unsigned CondOpc = CondDef->getOpcode();
5010 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5011 if (tryOptSelectConjunction(I, *CondDef))
5012 return true;
5013 return false;
5014 }
5015
5017 if (CondOpc == TargetOpcode::G_ICMP) {
5018 auto &PredOp = CondDef->getOperand(1);
5019 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5020 MIB);
5021 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5022 CondCode =
5023 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5024 } else {
5025 // Get the condition code for the select.
5026 auto Pred =
5027 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5028 AArch64CC::CondCode CondCode2;
5029 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5030
5031 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5032 // instructions to emit the comparison.
5033 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5034 // unnecessary.
5035 if (CondCode2 != AArch64CC::AL)
5036 return false;
5037
5038 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5039 CondDef->getOperand(3).getReg(), MIB)) {
5040 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5041 return false;
5042 }
5043 }
5044
5045 // Emit the select.
5046 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5047 I.getOperand(3).getReg(), CondCode, MIB);
5048 I.eraseFromParent();
5049 return true;
5050}
5051
5052MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5053 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5054 MachineIRBuilder &MIRBuilder) const {
5055 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5056 "Unexpected MachineOperand");
5057 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5058 // We want to find this sort of thing:
5059 // x = G_SUB 0, y
5060 // G_ICMP z, x
5061 //
5062 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5063 // e.g:
5064 //
5065 // cmn z, y
5066
5067 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5068 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5069 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5070 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5071
5072 // Given this:
5073 //
5074 // x = G_SUB 0, y
5075 // G_ICMP z, x
5076 //
5077 // Produce this:
5078 //
5079 // cmn z, y
5080 if (isCMN(RHSDef, P, MRI))
5081 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5082
5083 // Same idea here, but with the LHS of the compare instead:
5084 //
5085 // Given this:
5086 //
5087 // x = G_SUB 0, y
5088 // G_ICMP x, z
5089 //
5090 // Produce this:
5091 //
5092 // cmn y, z
5093 //
5094 // But be careful! We need to swap the predicate!
5095 if (isCMN(LHSDef, P, MRI)) {
5096 if (!CmpInst::isEquality(P)) {
5099 }
5100 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5101 }
5102
5103 // Given this:
5104 //
5105 // z = G_AND x, y
5106 // G_ICMP z, 0
5107 //
5108 // Produce this if the compare is signed:
5109 //
5110 // tst x, y
5111 if (!CmpInst::isUnsigned(P) && LHSDef &&
5112 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5113 // Make sure that the RHS is 0.
5114 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5115 if (!ValAndVReg || ValAndVReg->Value != 0)
5116 return nullptr;
5117
5118 return emitTST(LHSDef->getOperand(1),
5119 LHSDef->getOperand(2), MIRBuilder);
5120 }
5121
5122 return nullptr;
5123}
5124
5125bool AArch64InstructionSelector::selectShuffleVector(
5126 MachineInstr &I, MachineRegisterInfo &MRI) {
5127 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5128 Register Src1Reg = I.getOperand(1).getReg();
5129 const LLT Src1Ty = MRI.getType(Src1Reg);
5130 Register Src2Reg = I.getOperand(2).getReg();
5131 const LLT Src2Ty = MRI.getType(Src2Reg);
5132 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5133
5134 MachineBasicBlock &MBB = *I.getParent();
5135 MachineFunction &MF = *MBB.getParent();
5136 LLVMContext &Ctx = MF.getFunction().getContext();
5137
5138 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5139 // it's originated from a <1 x T> type. Those should have been lowered into
5140 // G_BUILD_VECTOR earlier.
5141 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5142 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5143 return false;
5144 }
5145
5146 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5147
5149 for (int Val : Mask) {
5150 // For now, any undef indexes we'll just assume to be 0. This should be
5151 // optimized in future, e.g. to select DUP etc.
5152 Val = Val < 0 ? 0 : Val;
5153 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5154 unsigned Offset = Byte + Val * BytesPerElt;
5155 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5156 }
5157 }
5158
5159 // Use a constant pool to load the index vector for TBL.
5160 Constant *CPVal = ConstantVector::get(CstIdxs);
5161 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5162 if (!IndexLoad) {
5163 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5164 return false;
5165 }
5166
5167 if (DstTy.getSizeInBits() != 128) {
5168 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5169 // This case can be done with TBL1.
5170 MachineInstr *Concat =
5171 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5172 if (!Concat) {
5173 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5174 return false;
5175 }
5176
5177 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5178 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5179 IndexLoad->getOperand(0).getReg(), MIB);
5180
5181 auto TBL1 = MIB.buildInstr(
5182 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5183 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5185
5186 auto Copy =
5187 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5188 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5189 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5190 I.eraseFromParent();
5191 return true;
5192 }
5193
5194 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5195 // Q registers for regalloc.
5196 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5197 auto RegSeq = createQTuple(Regs, MIB);
5198 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5199 {RegSeq, IndexLoad->getOperand(0)});
5201 I.eraseFromParent();
5202 return true;
5203}
5204
5205MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5206 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5207 unsigned LaneIdx, const RegisterBank &RB,
5208 MachineIRBuilder &MIRBuilder) const {
5209 MachineInstr *InsElt = nullptr;
5210 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5211 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5212
5213 // Create a register to define with the insert if one wasn't passed in.
5214 if (!DstReg)
5215 DstReg = MRI.createVirtualRegister(DstRC);
5216
5217 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5218 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5219
5220 if (RB.getID() == AArch64::FPRRegBankID) {
5221 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5222 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5223 .addImm(LaneIdx)
5224 .addUse(InsSub->getOperand(0).getReg())
5225 .addImm(0);
5226 } else {
5227 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5228 .addImm(LaneIdx)
5229 .addUse(EltReg);
5230 }
5231
5233 return InsElt;
5234}
5235
5236bool AArch64InstructionSelector::selectUSMovFromExtend(
5237 MachineInstr &MI, MachineRegisterInfo &MRI) {
5238 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5239 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5240 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5241 return false;
5242 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5243 const Register DefReg = MI.getOperand(0).getReg();
5244 const LLT DstTy = MRI.getType(DefReg);
5245 unsigned DstSize = DstTy.getSizeInBits();
5246
5247 if (DstSize != 32 && DstSize != 64)
5248 return false;
5249
5250 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5251 MI.getOperand(1).getReg(), MRI);
5252 int64_t Lane;
5253 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5254 return false;
5255 Register Src0 = Extract->getOperand(1).getReg();
5256
5257 const LLT VecTy = MRI.getType(Src0);
5258 if (VecTy.isScalableVector())
5259 return false;
5260
5261 if (VecTy.getSizeInBits() != 128) {
5262 const MachineInstr *ScalarToVector = emitScalarToVector(
5263 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5264 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5265 Src0 = ScalarToVector->getOperand(0).getReg();
5266 }
5267
5268 unsigned Opcode;
5269 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5270 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5271 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5272 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5273 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5274 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5275 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5276 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5277 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5278 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5279 else
5280 llvm_unreachable("Unexpected type combo for S/UMov!");
5281
5282 // We may need to generate one of these, depending on the type and sign of the
5283 // input:
5284 // DstReg = SMOV Src0, Lane;
5285 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5286 MachineInstr *ExtI = nullptr;
5287 if (DstSize == 64 && !IsSigned) {
5288 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5289 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5290 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5291 .addImm(0)
5292 .addUse(NewReg)
5293 .addImm(AArch64::sub_32);
5294 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5295 } else
5296 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5297
5299 MI.eraseFromParent();
5300 return true;
5301}
5302
5303MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5304 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5305 unsigned int Op;
5306 if (DstSize == 128) {
5307 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5308 return nullptr;
5309 Op = AArch64::MOVIv16b_ns;
5310 } else {
5311 Op = AArch64::MOVIv8b_ns;
5312 }
5313
5314 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5315
5318 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5320 return &*Mov;
5321 }
5322 return nullptr;
5323}
5324
5325MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5326 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5327 bool Inv) {
5328
5329 unsigned int Op;
5330 if (DstSize == 128) {
5331 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5332 return nullptr;
5333 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5334 } else {
5335 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5336 }
5337
5338 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5339 uint64_t Shift;
5340
5343 Shift = 0;
5344 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5346 Shift = 8;
5347 } else
5348 return nullptr;
5349
5350 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5352 return &*Mov;
5353}
5354
5355MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5356 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5357 bool Inv) {
5358
5359 unsigned int Op;
5360 if (DstSize == 128) {
5361 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5362 return nullptr;
5363 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5364 } else {
5365 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5366 }
5367
5368 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5369 uint64_t Shift;
5370
5373 Shift = 0;
5374 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5376 Shift = 8;
5377 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5379 Shift = 16;
5380 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5382 Shift = 24;
5383 } else
5384 return nullptr;
5385
5386 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5388 return &*Mov;
5389}
5390
5391MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5392 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5393
5394 unsigned int Op;
5395 if (DstSize == 128) {
5396 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5397 return nullptr;
5398 Op = AArch64::MOVIv2d_ns;
5399 } else {
5400 Op = AArch64::MOVID;
5401 }
5402
5403 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5406 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5408 return &*Mov;
5409 }
5410 return nullptr;
5411}
5412
5413MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5414 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5415 bool Inv) {
5416
5417 unsigned int Op;
5418 if (DstSize == 128) {
5419 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5420 return nullptr;
5421 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5422 } else {
5423 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5424 }
5425
5426 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5427 uint64_t Shift;
5428
5431 Shift = 264;
5432 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5434 Shift = 272;
5435 } else
5436 return nullptr;
5437
5438 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5440 return &*Mov;
5441}
5442
5443MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5444 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5445
5446 unsigned int Op;
5447 bool IsWide = false;
5448 if (DstSize == 128) {
5449 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5450 return nullptr;
5451 Op = AArch64::FMOVv4f32_ns;
5452 IsWide = true;
5453 } else {
5454 Op = AArch64::FMOVv2f32_ns;
5455 }
5456
5457 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5458
5461 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5463 Op = AArch64::FMOVv2f64_ns;
5464 } else
5465 return nullptr;
5466
5467 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5469 return &*Mov;
5470}
5471
5472bool AArch64InstructionSelector::selectIndexedExtLoad(
5473 MachineInstr &MI, MachineRegisterInfo &MRI) {
5474 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5475 Register Dst = ExtLd.getDstReg();
5476 Register WriteBack = ExtLd.getWritebackReg();
5477 Register Base = ExtLd.getBaseReg();
5478 Register Offset = ExtLd.getOffsetReg();
5479 LLT Ty = MRI.getType(Dst);
5480 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5481 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5482 bool IsPre = ExtLd.isPre();
5483 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5484 unsigned InsertIntoSubReg = 0;
5485 bool IsDst64 = Ty.getSizeInBits() == 64;
5486
5487 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5488 // long as they are scalar.
5489 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5490 if ((IsSExt && IsFPR) || Ty.isVector())
5491 return false;
5492
5493 unsigned Opc = 0;
5494 LLT NewLdDstTy;
5495 LLT s32 = LLT::scalar(32);
5496 LLT s64 = LLT::scalar(64);
5497
5498 if (MemSizeBits == 8) {
5499 if (IsSExt) {
5500 if (IsDst64)
5501 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5502 else
5503 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5504 NewLdDstTy = IsDst64 ? s64 : s32;
5505 } else if (IsFPR) {
5506 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5507 InsertIntoSubReg = AArch64::bsub;
5508 NewLdDstTy = LLT::scalar(MemSizeBits);
5509 } else {
5510 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5511 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5512 NewLdDstTy = s32;
5513 }
5514 } else if (MemSizeBits == 16) {
5515 if (IsSExt) {
5516 if (IsDst64)
5517 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5518 else
5519 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5520 NewLdDstTy = IsDst64 ? s64 : s32;
5521 } else if (IsFPR) {
5522 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5523 InsertIntoSubReg = AArch64::hsub;
5524 NewLdDstTy = LLT::scalar(MemSizeBits);
5525 } else {
5526 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5527 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5528 NewLdDstTy = s32;
5529 }
5530 } else if (MemSizeBits == 32) {
5531 if (IsSExt) {
5532 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5533 NewLdDstTy = s64;
5534 } else if (IsFPR) {
5535 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5536 InsertIntoSubReg = AArch64::ssub;
5537 NewLdDstTy = LLT::scalar(MemSizeBits);
5538 } else {
5539 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5540 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5541 NewLdDstTy = s32;
5542 }
5543 } else {
5544 llvm_unreachable("Unexpected size for indexed load");
5545 }
5546
5547 auto Cst = getIConstantVRegVal(Offset, MRI);
5548 if (!Cst)
5549 return false; // Shouldn't happen, but just in case.
5550
5551 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5552 .addImm(Cst->getSExtValue());
5553 LdMI.cloneMemRefs(ExtLd);
5555 // Make sure to select the load with the MemTy as the dest type, and then
5556 // insert into a larger reg if needed.
5557 if (InsertIntoSubReg) {
5558 // Generate a SUBREG_TO_REG.
5559 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5560 .addImm(0)
5561 .addUse(LdMI.getReg(1))
5562 .addImm(InsertIntoSubReg);
5564 SubToReg.getReg(0),
5565 *getRegClassForTypeOnBank(MRI.getType(Dst),
5566 *RBI.getRegBank(Dst, MRI, TRI)),
5567 MRI);
5568 } else {
5569 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5570 selectCopy(*Copy, TII, MRI, TRI, RBI);
5571 }
5572 MI.eraseFromParent();
5573
5574 return true;
5575}
5576
5577bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5578 MachineRegisterInfo &MRI) {
5579 auto &Ld = cast<GIndexedLoad>(MI);
5580 Register Dst = Ld.getDstReg();
5581 Register WriteBack = Ld.getWritebackReg();
5582 Register Base = Ld.getBaseReg();
5583 Register Offset = Ld.getOffsetReg();
5584 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5585 "Unexpected type for indexed load");
5586 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5587
5588 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5589 return selectIndexedExtLoad(MI, MRI);
5590
5591 unsigned Opc = 0;
5592 if (Ld.isPre()) {
5593 static constexpr unsigned GPROpcodes[] = {
5594 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5595 AArch64::LDRXpre};
5596 static constexpr unsigned FPROpcodes[] = {
5597 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5598 AArch64::LDRQpre};
5599 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5600 Opc = FPROpcodes[Log2_32(MemSize)];
5601 else
5602 Opc = GPROpcodes[Log2_32(MemSize)];
5603 } else {
5604 static constexpr unsigned GPROpcodes[] = {
5605 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5606 AArch64::LDRXpost};
5607 static constexpr unsigned FPROpcodes[] = {
5608 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5609 AArch64::LDRDpost, AArch64::LDRQpost};
5610 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5611 Opc = FPROpcodes[Log2_32(MemSize)];
5612 else
5613 Opc = GPROpcodes[Log2_32(MemSize)];
5614 }
5615 auto Cst = getIConstantVRegVal(Offset, MRI);
5616 if (!Cst)
5617 return false; // Shouldn't happen, but just in case.
5618 auto LdMI =
5619 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5620 LdMI.cloneMemRefs(Ld);
5622 MI.eraseFromParent();
5623 return true;
5624}
5625
5626bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5627 MachineRegisterInfo &MRI) {
5628 Register Dst = I.getWritebackReg();
5629 Register Val = I.getValueReg();
5630 Register Base = I.getBaseReg();
5631 Register Offset = I.getOffsetReg();
5632 LLT ValTy = MRI.getType(Val);
5633 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5634
5635 unsigned Opc = 0;
5636 if (I.isPre()) {
5637 static constexpr unsigned GPROpcodes[] = {
5638 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5639 AArch64::STRXpre};
5640 static constexpr unsigned FPROpcodes[] = {
5641 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5642 AArch64::STRQpre};
5643
5644 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5645 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5646 else
5647 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5648 } else {
5649 static constexpr unsigned GPROpcodes[] = {
5650 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5651 AArch64::STRXpost};
5652 static constexpr unsigned FPROpcodes[] = {
5653 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5654 AArch64::STRDpost, AArch64::STRQpost};
5655
5656 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5657 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5658 else
5659 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5660 }
5661
5662 auto Cst = getIConstantVRegVal(Offset, MRI);
5663 if (!Cst)
5664 return false; // Shouldn't happen, but just in case.
5665 auto Str =
5666 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5667 Str.cloneMemRefs(I);
5669 I.eraseFromParent();
5670 return true;
5671}
5672
5673MachineInstr *
5674AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5675 MachineIRBuilder &MIRBuilder,
5676 MachineRegisterInfo &MRI) {
5677 LLT DstTy = MRI.getType(Dst);
5678 unsigned DstSize = DstTy.getSizeInBits();
5679 if (CV->isNullValue()) {
5680 if (DstSize == 128) {
5681 auto Mov =
5682 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5684 return &*Mov;
5685 }
5686
5687 if (DstSize == 64) {
5688 auto Mov =
5689 MIRBuilder
5690 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5691 .addImm(0);
5692 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5693 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5694 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5695 return &*Copy;
5696 }
5697 }
5698
5699 if (Constant *SplatValue = CV->getSplatValue()) {
5700 APInt SplatValueAsInt =
5701 isa<ConstantFP>(SplatValue)
5702 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5703 : SplatValue->getUniqueInteger();
5704 APInt DefBits = APInt::getSplat(
5705 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5706 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5707 MachineInstr *NewOp;
5708 bool Inv = false;
5709 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5710 (NewOp =
5711 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5712 (NewOp =
5713 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5714 (NewOp =
5715 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5716 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5717 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5718 return NewOp;
5719
5720 DefBits = ~DefBits;
5721 Inv = true;
5722 if ((NewOp =
5723 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5724 (NewOp =
5725 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5726 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5727 return NewOp;
5728 return nullptr;
5729 };
5730
5731 if (auto *NewOp = TryMOVIWithBits(DefBits))
5732 return NewOp;
5733
5734 // See if a fneg of the constant can be materialized with a MOVI, etc
5735 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5736 unsigned NegOpc) -> MachineInstr * {
5737 // FNegate each sub-element of the constant
5738 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5739 APInt NegBits(DstSize, 0);
5740 unsigned NumElts = DstSize / NumBits;
5741 for (unsigned i = 0; i < NumElts; i++)
5742 NegBits |= Neg << (NumBits * i);
5743 NegBits = DefBits ^ NegBits;
5744
5745 // Try to create the new constants with MOVI, and if so generate a fneg
5746 // for it.
5747 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5748 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5749 NewOp->getOperand(0).setReg(NewDst);
5750 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5751 }
5752 return nullptr;
5753 };
5754 MachineInstr *R;
5755 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5756 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5757 (STI.hasFullFP16() &&
5758 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5759 return R;
5760 }
5761
5762 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5763 if (!CPLoad) {
5764 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5765 return nullptr;
5766 }
5767
5768 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5770 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5771 return &*Copy;
5772}
5773
5774bool AArch64InstructionSelector::tryOptConstantBuildVec(
5775 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5776 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5777 unsigned DstSize = DstTy.getSizeInBits();
5778 assert(DstSize <= 128 && "Unexpected build_vec type!");
5779 if (DstSize < 32)
5780 return false;
5781 // Check if we're building a constant vector, in which case we want to
5782 // generate a constant pool load instead of a vector insert sequence.
5784 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5785 // Try to find G_CONSTANT or G_FCONSTANT
5786 auto *OpMI =
5787 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5788 if (OpMI)
5789 Csts.emplace_back(
5790 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5791 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5792 I.getOperand(Idx).getReg(), MRI)))
5793 Csts.emplace_back(
5794 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5795 else
5796 return false;
5797 }
5798 Constant *CV = ConstantVector::get(Csts);
5799 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5800 return false;
5801 I.eraseFromParent();
5802 return true;
5803}
5804
5805bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5806 MachineInstr &I, MachineRegisterInfo &MRI) {
5807 // Given:
5808 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5809 //
5810 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5811 Register Dst = I.getOperand(0).getReg();
5812 Register EltReg = I.getOperand(1).getReg();
5813 LLT EltTy = MRI.getType(EltReg);
5814 // If the index isn't on the same bank as its elements, then this can't be a
5815 // SUBREG_TO_REG.
5816 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5817 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5818 if (EltRB != DstRB)
5819 return false;
5820 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5821 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5822 }))
5823 return false;
5824 unsigned SubReg;
5825 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5826 if (!EltRC)
5827 return false;
5828 const TargetRegisterClass *DstRC =
5829 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5830 if (!DstRC)
5831 return false;
5832 if (!getSubRegForClass(EltRC, TRI, SubReg))
5833 return false;
5834 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5835 .addImm(0)
5836 .addUse(EltReg)
5837 .addImm(SubReg);
5838 I.eraseFromParent();
5839 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5840 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5841}
5842
5843bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5844 MachineRegisterInfo &MRI) {
5845 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5846 // Until we port more of the optimized selections, for now just use a vector
5847 // insert sequence.
5848 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5849 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5850 unsigned EltSize = EltTy.getSizeInBits();
5851
5852 if (tryOptConstantBuildVec(I, DstTy, MRI))
5853 return true;
5854 if (tryOptBuildVecToSubregToReg(I, MRI))
5855 return true;
5856
5857 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5858 return false; // Don't support all element types yet.
5859 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5860
5861 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5862 MachineInstr *ScalarToVec =
5863 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5864 I.getOperand(1).getReg(), MIB);
5865 if (!ScalarToVec)
5866 return false;
5867
5868 Register DstVec = ScalarToVec->getOperand(0).getReg();
5869 unsigned DstSize = DstTy.getSizeInBits();
5870
5871 // Keep track of the last MI we inserted. Later on, we might be able to save
5872 // a copy using it.
5873 MachineInstr *PrevMI = ScalarToVec;
5874 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5875 // Note that if we don't do a subregister copy, we can end up making an
5876 // extra register.
5877 Register OpReg = I.getOperand(i).getReg();
5878 // Do not emit inserts for undefs
5879 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5880 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5881 DstVec = PrevMI->getOperand(0).getReg();
5882 }
5883 }
5884
5885 // If DstTy's size in bits is less than 128, then emit a subregister copy
5886 // from DstVec to the last register we've defined.
5887 if (DstSize < 128) {
5888 // Force this to be FPR using the destination vector.
5889 const TargetRegisterClass *RC =
5890 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5891 if (!RC)
5892 return false;
5893 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5894 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5895 return false;
5896 }
5897
5898 unsigned SubReg = 0;
5899 if (!getSubRegForClass(RC, TRI, SubReg))
5900 return false;
5901 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5902 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5903 << "\n");
5904 return false;
5905 }
5906
5907 Register Reg = MRI.createVirtualRegister(RC);
5908 Register DstReg = I.getOperand(0).getReg();
5909
5910 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5911 MachineOperand &RegOp = I.getOperand(1);
5912 RegOp.setReg(Reg);
5913 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5914 } else {
5915 // We either have a vector with all elements (except the first one) undef or
5916 // at least one non-undef non-first element. In the first case, we need to
5917 // constrain the output register ourselves as we may have generated an
5918 // INSERT_SUBREG operation which is a generic operation for which the
5919 // output regclass cannot be automatically chosen.
5920 //
5921 // In the second case, there is no need to do this as it may generate an
5922 // instruction like INSvi32gpr where the regclass can be automatically
5923 // chosen.
5924 //
5925 // Also, we save a copy by re-using the destination register on the final
5926 // insert.
5927 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5929
5930 Register DstReg = PrevMI->getOperand(0).getReg();
5931 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5932 const TargetRegisterClass *RC =
5933 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5934 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5935 }
5936 }
5937
5939 return true;
5940}
5941
5942bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5943 unsigned NumVecs,
5944 MachineInstr &I) {
5945 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5946 assert(Opc && "Expected an opcode?");
5947 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5948 auto &MRI = *MIB.getMRI();
5949 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5950 unsigned Size = Ty.getSizeInBits();
5951 assert((Size == 64 || Size == 128) &&
5952 "Destination must be 64 bits or 128 bits?");
5953 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5954 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5955 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5956 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5957 Load.cloneMemRefs(I);
5959 Register SelectedLoadDst = Load->getOperand(0).getReg();
5960 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5961 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5962 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5963 // Emit the subreg copies and immediately select them.
5964 // FIXME: We should refactor our copy code into an emitCopy helper and
5965 // clean up uses of this pattern elsewhere in the selector.
5966 selectCopy(*Vec, TII, MRI, TRI, RBI);
5967 }
5968 return true;
5969}
5970
5971bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5972 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5973 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5974 assert(Opc && "Expected an opcode?");
5975 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5976 auto &MRI = *MIB.getMRI();
5977 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5978 bool Narrow = Ty.getSizeInBits() == 64;
5979
5980 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5981 SmallVector<Register, 4> Regs(NumVecs);
5982 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5983 [](auto MO) { return MO.getReg(); });
5984
5985 if (Narrow) {
5986 transform(Regs, Regs.begin(), [this](Register Reg) {
5987 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5988 ->getOperand(0)
5989 .getReg();
5990 });
5991 Ty = Ty.multiplyElements(2);
5992 }
5993
5994 Register Tuple = createQTuple(Regs, MIB);
5995 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5996 if (!LaneNo)
5997 return false;
5998
5999 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6000 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6001 .addReg(Tuple)
6002 .addImm(LaneNo->getZExtValue())
6003 .addReg(Ptr);
6004 Load.cloneMemRefs(I);
6006 Register SelectedLoadDst = Load->getOperand(0).getReg();
6007 unsigned SubReg = AArch64::qsub0;
6008 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6009 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6010 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6011 : DstOp(I.getOperand(Idx).getReg())},
6012 {})
6013 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6014 Register WideReg = Vec.getReg(0);
6015 // Emit the subreg copies and immediately select them.
6016 selectCopy(*Vec, TII, MRI, TRI, RBI);
6017 if (Narrow &&
6018 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6019 return false;
6020 }
6021 return true;
6022}
6023
6024void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6025 unsigned NumVecs,
6026 unsigned Opc) {
6027 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6028 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6029 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6030
6031 SmallVector<Register, 2> Regs(NumVecs);
6032 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6033 Regs.begin(), [](auto MO) { return MO.getReg(); });
6034
6035 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6036 : createDTuple(Regs, MIB);
6037 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6038 Store.cloneMemRefs(I);
6040}
6041
6042bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6043 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6044 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6045 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6046 bool Narrow = Ty.getSizeInBits() == 64;
6047
6048 SmallVector<Register, 2> Regs(NumVecs);
6049 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6050 Regs.begin(), [](auto MO) { return MO.getReg(); });
6051
6052 if (Narrow)
6053 transform(Regs, Regs.begin(), [this](Register Reg) {
6054 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6055 ->getOperand(0)
6056 .getReg();
6057 });
6058
6059 Register Tuple = createQTuple(Regs, MIB);
6060
6061 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6062 if (!LaneNo)
6063 return false;
6064 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6065 auto Store = MIB.buildInstr(Opc, {}, {})
6066 .addReg(Tuple)
6067 .addImm(LaneNo->getZExtValue())
6068 .addReg(Ptr);
6069 Store.cloneMemRefs(I);
6071 return true;
6072}
6073
6074bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6075 MachineInstr &I, MachineRegisterInfo &MRI) {
6076 // Find the intrinsic ID.
6077 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6078
6079 const LLT S8 = LLT::scalar(8);
6080 const LLT S16 = LLT::scalar(16);
6081 const LLT S32 = LLT::scalar(32);
6082 const LLT S64 = LLT::scalar(64);
6083 const LLT P0 = LLT::pointer(0, 64);
6084 // Select the instruction.
6085 switch (IntrinID) {
6086 default:
6087 return false;
6088 case Intrinsic::aarch64_ldxp:
6089 case Intrinsic::aarch64_ldaxp: {
6090 auto NewI = MIB.buildInstr(
6091 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6092 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6093 {I.getOperand(3)});
6094 NewI.cloneMemRefs(I);
6096 break;
6097 }
6098 case Intrinsic::aarch64_neon_ld1x2: {
6099 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6100 unsigned Opc = 0;
6101 if (Ty == LLT::fixed_vector(8, S8))
6102 Opc = AArch64::LD1Twov8b;
6103 else if (Ty == LLT::fixed_vector(16, S8))
6104 Opc = AArch64::LD1Twov16b;
6105 else if (Ty == LLT::fixed_vector(4, S16))
6106 Opc = AArch64::LD1Twov4h;
6107 else if (Ty == LLT::fixed_vector(8, S16))
6108 Opc = AArch64::LD1Twov8h;
6109 else if (Ty == LLT::fixed_vector(2, S32))
6110 Opc = AArch64::LD1Twov2s;
6111 else if (Ty == LLT::fixed_vector(4, S32))
6112 Opc = AArch64::LD1Twov4s;
6113 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6114 Opc = AArch64::LD1Twov2d;
6115 else if (Ty == S64 || Ty == P0)
6116 Opc = AArch64::LD1Twov1d;
6117 else
6118 llvm_unreachable("Unexpected type for ld1x2!");
6119 selectVectorLoadIntrinsic(Opc, 2, I);
6120 break;
6121 }
6122 case Intrinsic::aarch64_neon_ld1x3: {
6123 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6124 unsigned Opc = 0;
6125 if (Ty == LLT::fixed_vector(8, S8))
6126 Opc = AArch64::LD1Threev8b;
6127 else if (Ty == LLT::fixed_vector(16, S8))
6128 Opc = AArch64::LD1Threev16b;
6129 else if (Ty == LLT::fixed_vector(4, S16))
6130 Opc = AArch64::LD1Threev4h;
6131 else if (Ty == LLT::fixed_vector(8, S16))
6132 Opc = AArch64::LD1Threev8h;
6133 else if (Ty == LLT::fixed_vector(2, S32))
6134 Opc = AArch64::LD1Threev2s;
6135 else if (Ty == LLT::fixed_vector(4, S32))
6136 Opc = AArch64::LD1Threev4s;
6137 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6138 Opc = AArch64::LD1Threev2d;
6139 else if (Ty == S64 || Ty == P0)
6140 Opc = AArch64::LD1Threev1d;
6141 else
6142 llvm_unreachable("Unexpected type for ld1x3!");
6143 selectVectorLoadIntrinsic(Opc, 3, I);
6144 break;
6145 }
6146 case Intrinsic::aarch64_neon_ld1x4: {
6147 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6148 unsigned Opc = 0;
6149 if (Ty == LLT::fixed_vector(8, S8))
6150 Opc = AArch64::LD1Fourv8b;
6151 else if (Ty == LLT::fixed_vector(16, S8))
6152 Opc = AArch64::LD1Fourv16b;
6153 else if (Ty == LLT::fixed_vector(4, S16))
6154 Opc = AArch64::LD1Fourv4h;
6155 else if (Ty == LLT::fixed_vector(8, S16))
6156 Opc = AArch64::LD1Fourv8h;
6157 else if (Ty == LLT::fixed_vector(2, S32))
6158 Opc = AArch64::LD1Fourv2s;
6159 else if (Ty == LLT::fixed_vector(4, S32))
6160 Opc = AArch64::LD1Fourv4s;
6161 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6162 Opc = AArch64::LD1Fourv2d;
6163 else if (Ty == S64 || Ty == P0)
6164 Opc = AArch64::LD1Fourv1d;
6165 else
6166 llvm_unreachable("Unexpected type for ld1x4!");
6167 selectVectorLoadIntrinsic(Opc, 4, I);
6168 break;
6169 }
6170 case Intrinsic::aarch64_neon_ld2: {
6171 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6172 unsigned Opc = 0;
6173 if (Ty == LLT::fixed_vector(8, S8))
6174 Opc = AArch64::LD2Twov8b;
6175 else if (Ty == LLT::fixed_vector(16, S8))
6176 Opc = AArch64::LD2Twov16b;
6177 else if (Ty == LLT::fixed_vector(4, S16))
6178 Opc = AArch64::LD2Twov4h;
6179 else if (Ty == LLT::fixed_vector(8, S16))
6180 Opc = AArch64::LD2Twov8h;
6181 else if (Ty == LLT::fixed_vector(2, S32))
6182 Opc = AArch64::LD2Twov2s;
6183 else if (Ty == LLT::fixed_vector(4, S32))
6184 Opc = AArch64::LD2Twov4s;
6185 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6186 Opc = AArch64::LD2Twov2d;
6187 else if (Ty == S64 || Ty == P0)
6188 Opc = AArch64::LD1Twov1d;
6189 else
6190 llvm_unreachable("Unexpected type for ld2!");
6191 selectVectorLoadIntrinsic(Opc, 2, I);
6192 break;
6193 }
6194 case Intrinsic::aarch64_neon_ld2lane: {
6195 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6196 unsigned Opc;
6197 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6198 Opc = AArch64::LD2i8;
6199 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6200 Opc = AArch64::LD2i16;
6201 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6202 Opc = AArch64::LD2i32;
6203 else if (Ty == LLT::fixed_vector(2, S64) ||
6204 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6205 Opc = AArch64::LD2i64;
6206 else
6207 llvm_unreachable("Unexpected type for st2lane!");
6208 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6209 return false;
6210 break;
6211 }
6212 case Intrinsic::aarch64_neon_ld2r: {
6213 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6214 unsigned Opc = 0;
6215 if (Ty == LLT::fixed_vector(8, S8))
6216 Opc = AArch64::LD2Rv8b;
6217 else if (Ty == LLT::fixed_vector(16, S8))
6218 Opc = AArch64::LD2Rv16b;
6219 else if (Ty == LLT::fixed_vector(4, S16))
6220 Opc = AArch64::LD2Rv4h;
6221 else if (Ty == LLT::fixed_vector(8, S16))
6222 Opc = AArch64::LD2Rv8h;
6223 else if (Ty == LLT::fixed_vector(2, S32))
6224 Opc = AArch64::LD2Rv2s;
6225 else if (Ty == LLT::fixed_vector(4, S32))
6226 Opc = AArch64::LD2Rv4s;
6227 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6228 Opc = AArch64::LD2Rv2d;
6229 else if (Ty == S64 || Ty == P0)
6230 Opc = AArch64::LD2Rv1d;
6231 else
6232 llvm_unreachable("Unexpected type for ld2r!");
6233 selectVectorLoadIntrinsic(Opc, 2, I);
6234 break;
6235 }
6236 case Intrinsic::aarch64_neon_ld3: {
6237 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6238 unsigned Opc = 0;
6239 if (Ty == LLT::fixed_vector(8, S8))
6240 Opc = AArch64::LD3Threev8b;
6241 else if (Ty == LLT::fixed_vector(16, S8))
6242 Opc = AArch64::LD3Threev16b;
6243 else if (Ty == LLT::fixed_vector(4, S16))
6244 Opc = AArch64::LD3Threev4h;
6245 else if (Ty == LLT::fixed_vector(8, S16))
6246 Opc = AArch64::LD3Threev8h;
6247 else if (Ty == LLT::fixed_vector(2, S32))
6248 Opc = AArch64::LD3Threev2s;
6249 else if (Ty == LLT::fixed_vector(4, S32))
6250 Opc = AArch64::LD3Threev4s;
6251 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6252 Opc = AArch64::LD3Threev2d;
6253 else if (Ty == S64 || Ty == P0)
6254 Opc = AArch64::LD1Threev1d;
6255 else
6256 llvm_unreachable("Unexpected type for ld3!");
6257 selectVectorLoadIntrinsic(Opc, 3, I);
6258 break;
6259 }
6260 case Intrinsic::aarch64_neon_ld3lane: {
6261 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6262 unsigned Opc;
6263 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6264 Opc = AArch64::LD3i8;
6265 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6266 Opc = AArch64::LD3i16;
6267 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6268 Opc = AArch64::LD3i32;
6269 else if (Ty == LLT::fixed_vector(2, S64) ||
6270 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6271 Opc = AArch64::LD3i64;
6272 else
6273 llvm_unreachable("Unexpected type for st3lane!");
6274 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6275 return false;
6276 break;
6277 }
6278 case Intrinsic::aarch64_neon_ld3r: {
6279 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6280 unsigned Opc = 0;
6281 if (Ty == LLT::fixed_vector(8, S8))
6282 Opc = AArch64::LD3Rv8b;
6283 else if (Ty == LLT::fixed_vector(16, S8))
6284 Opc = AArch64::LD3Rv16b;
6285 else if (Ty == LLT::fixed_vector(4, S16))
6286 Opc = AArch64::LD3Rv4h;
6287 else if (Ty == LLT::fixed_vector(8, S16))
6288 Opc = AArch64::LD3Rv8h;
6289 else if (Ty == LLT::fixed_vector(2, S32))
6290 Opc = AArch64::LD3Rv2s;
6291 else if (Ty == LLT::fixed_vector(4, S32))
6292 Opc = AArch64::LD3Rv4s;
6293 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6294 Opc = AArch64::LD3Rv2d;
6295 else if (Ty == S64 || Ty == P0)
6296 Opc = AArch64::LD3Rv1d;
6297 else
6298 llvm_unreachable("Unexpected type for ld3r!");
6299 selectVectorLoadIntrinsic(Opc, 3, I);
6300 break;
6301 }
6302 case Intrinsic::aarch64_neon_ld4: {
6303 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6304 unsigned Opc = 0;
6305 if (Ty == LLT::fixed_vector(8, S8))
6306 Opc = AArch64::LD4Fourv8b;
6307 else if (Ty == LLT::fixed_vector(16, S8))
6308 Opc = AArch64::LD4Fourv16b;
6309 else if (Ty == LLT::fixed_vector(4, S16))
6310 Opc = AArch64::LD4Fourv4h;
6311 else if (Ty == LLT::fixed_vector(8, S16))
6312 Opc = AArch64::LD4Fourv8h;
6313 else if (Ty == LLT::fixed_vector(2, S32))
6314 Opc = AArch64::LD4Fourv2s;
6315 else if (Ty == LLT::fixed_vector(4, S32))
6316 Opc = AArch64::LD4Fourv4s;
6317 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6318 Opc = AArch64::LD4Fourv2d;
6319 else if (Ty == S64 || Ty == P0)
6320 Opc = AArch64::LD1Fourv1d;
6321 else
6322 llvm_unreachable("Unexpected type for ld4!");
6323 selectVectorLoadIntrinsic(Opc, 4, I);
6324 break;
6325 }
6326 case Intrinsic::aarch64_neon_ld4lane: {
6327 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6328 unsigned Opc;
6329 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6330 Opc = AArch64::LD4i8;
6331 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6332 Opc = AArch64::LD4i16;
6333 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6334 Opc = AArch64::LD4i32;
6335 else if (Ty == LLT::fixed_vector(2, S64) ||
6336 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6337 Opc = AArch64::LD4i64;
6338 else
6339 llvm_unreachable("Unexpected type for st4lane!");
6340 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6341 return false;
6342 break;
6343 }
6344 case Intrinsic::aarch64_neon_ld4r: {
6345 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6346 unsigned Opc = 0;
6347 if (Ty == LLT::fixed_vector(8, S8))
6348 Opc = AArch64::LD4Rv8b;
6349 else if (Ty == LLT::fixed_vector(16, S8))
6350 Opc = AArch64::LD4Rv16b;
6351 else if (Ty == LLT::fixed_vector(4, S16))
6352 Opc = AArch64::LD4Rv4h;
6353 else if (Ty == LLT::fixed_vector(8, S16))
6354 Opc = AArch64::LD4Rv8h;
6355 else if (Ty == LLT::fixed_vector(2, S32))
6356 Opc = AArch64::LD4Rv2s;
6357 else if (Ty == LLT::fixed_vector(4, S32))
6358 Opc = AArch64::LD4Rv4s;
6359 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6360 Opc = AArch64::LD4Rv2d;
6361 else if (Ty == S64 || Ty == P0)
6362 Opc = AArch64::LD4Rv1d;
6363 else
6364 llvm_unreachable("Unexpected type for ld4r!");
6365 selectVectorLoadIntrinsic(Opc, 4, I);
6366 break;
6367 }
6368 case Intrinsic::aarch64_neon_st1x2: {
6369 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6370 unsigned Opc;
6371 if (Ty == LLT::fixed_vector(8, S8))
6372 Opc = AArch64::ST1Twov8b;
6373 else if (Ty == LLT::fixed_vector(16, S8))
6374 Opc = AArch64::ST1Twov16b;
6375 else if (Ty == LLT::fixed_vector(4, S16))
6376 Opc = AArch64::ST1Twov4h;
6377 else if (Ty == LLT::fixed_vector(8, S16))
6378 Opc = AArch64::ST1Twov8h;
6379 else if (Ty == LLT::fixed_vector(2, S32))
6380 Opc = AArch64::ST1Twov2s;
6381 else if (Ty == LLT::fixed_vector(4, S32))
6382 Opc = AArch64::ST1Twov4s;
6383 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6384 Opc = AArch64::ST1Twov2d;
6385 else if (Ty == S64 || Ty == P0)
6386 Opc = AArch64::ST1Twov1d;
6387 else
6388 llvm_unreachable("Unexpected type for st1x2!");
6389 selectVectorStoreIntrinsic(I, 2, Opc);
6390 break;
6391 }
6392 case Intrinsic::aarch64_neon_st1x3: {
6393 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6394 unsigned Opc;
6395 if (Ty == LLT::fixed_vector(8, S8))
6396 Opc = AArch64::ST1Threev8b;
6397 else if (Ty == LLT::fixed_vector(16, S8))
6398 Opc = AArch64::ST1Threev16b;
6399 else if (Ty == LLT::fixed_vector(4, S16))
6400 Opc = AArch64::ST1Threev4h;
6401 else if (Ty == LLT::fixed_vector(8, S16))
6402 Opc = AArch64::ST1Threev8h;
6403 else if (Ty == LLT::fixed_vector(2, S32))
6404 Opc = AArch64::ST1Threev2s;
6405 else if (Ty == LLT::fixed_vector(4, S32))
6406 Opc = AArch64::ST1Threev4s;
6407 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6408 Opc = AArch64::ST1Threev2d;
6409 else if (Ty == S64 || Ty == P0)
6410 Opc = AArch64::ST1Threev1d;
6411 else
6412 llvm_unreachable("Unexpected type for st1x3!");
6413 selectVectorStoreIntrinsic(I, 3, Opc);
6414 break;
6415 }
6416 case Intrinsic::aarch64_neon_st1x4: {
6417 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6418 unsigned Opc;
6419 if (Ty == LLT::fixed_vector(8, S8))
6420 Opc = AArch64::ST1Fourv8b;
6421 else if (Ty == LLT::fixed_vector(16, S8))
6422 Opc = AArch64::ST1Fourv16b;
6423 else if (Ty == LLT::fixed_vector(4, S16))
6424 Opc = AArch64::ST1Fourv4h;
6425 else if (Ty == LLT::fixed_vector(8, S16))
6426 Opc = AArch64::ST1Fourv8h;
6427 else if (Ty == LLT::fixed_vector(2, S32))
6428 Opc = AArch64::ST1Fourv2s;
6429 else if (Ty == LLT::fixed_vector(4, S32))
6430 Opc = AArch64::ST1Fourv4s;
6431 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6432 Opc = AArch64::ST1Fourv2d;
6433 else if (Ty == S64 || Ty == P0)
6434 Opc = AArch64::ST1Fourv1d;
6435 else
6436 llvm_unreachable("Unexpected type for st1x4!");
6437 selectVectorStoreIntrinsic(I, 4, Opc);
6438 break;
6439 }
6440 case Intrinsic::aarch64_neon_st2: {
6441 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6442 unsigned Opc;
6443 if (Ty == LLT::fixed_vector(8, S8))
6444 Opc = AArch64::ST2Twov8b;
6445 else if (Ty == LLT::fixed_vector(16, S8))
6446 Opc = AArch64::ST2Twov16b;
6447 else if (Ty == LLT::fixed_vector(4, S16))
6448 Opc = AArch64::ST2Twov4h;
6449 else if (Ty == LLT::fixed_vector(8, S16))
6450 Opc = AArch64::ST2Twov8h;
6451 else if (Ty == LLT::fixed_vector(2, S32))
6452 Opc = AArch64::ST2Twov2s;
6453 else if (Ty == LLT::fixed_vector(4, S32))
6454 Opc = AArch64::ST2Twov4s;
6455 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6456 Opc = AArch64::ST2Twov2d;
6457 else if (Ty == S64 || Ty == P0)
6458 Opc = AArch64::ST1Twov1d;
6459 else
6460 llvm_unreachable("Unexpected type for st2!");
6461 selectVectorStoreIntrinsic(I, 2, Opc);
6462 break;
6463 }
6464 case Intrinsic::aarch64_neon_st3: {
6465 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6466 unsigned Opc;
6467 if (Ty == LLT::fixed_vector(8, S8))
6468 Opc = AArch64::ST3Threev8b;
6469 else if (Ty == LLT::fixed_vector(16, S8))
6470 Opc = AArch64::ST3Threev16b;
6471 else if (Ty == LLT::fixed_vector(4, S16))
6472 Opc = AArch64::ST3Threev4h;
6473 else if (Ty == LLT::fixed_vector(8, S16))
6474 Opc = AArch64::ST3Threev8h;
6475 else if (Ty == LLT::fixed_vector(2, S32))
6476 Opc = AArch64::ST3Threev2s;
6477 else if (Ty == LLT::fixed_vector(4, S32))
6478 Opc = AArch64::ST3Threev4s;
6479 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6480 Opc = AArch64::ST3Threev2d;
6481 else if (Ty == S64 || Ty == P0)
6482 Opc = AArch64::ST1Threev1d;
6483 else
6484 llvm_unreachable("Unexpected type for st3!");
6485 selectVectorStoreIntrinsic(I, 3, Opc);
6486 break;
6487 }
6488 case Intrinsic::aarch64_neon_st4: {
6489 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6490 unsigned Opc;
6491 if (Ty == LLT::fixed_vector(8, S8))
6492 Opc = AArch64::ST4Fourv8b;
6493 else if (Ty == LLT::fixed_vector(16, S8))
6494 Opc = AArch64::ST4Fourv16b;
6495 else if (Ty == LLT::fixed_vector(4, S16))
6496 Opc = AArch64::ST4Fourv4h;
6497 else if (Ty == LLT::fixed_vector(8, S16))
6498 Opc = AArch64::ST4Fourv8h;
6499 else if (Ty == LLT::fixed_vector(2, S32))
6500 Opc = AArch64::ST4Fourv2s;
6501 else if (Ty == LLT::fixed_vector(4, S32))
6502 Opc = AArch64::ST4Fourv4s;
6503 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6504 Opc = AArch64::ST4Fourv2d;
6505 else if (Ty == S64 || Ty == P0)
6506 Opc = AArch64::ST1Fourv1d;
6507 else
6508 llvm_unreachable("Unexpected type for st4!");
6509 selectVectorStoreIntrinsic(I, 4, Opc);
6510 break;
6511 }
6512 case Intrinsic::aarch64_neon_st2lane: {
6513 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6514 unsigned Opc;
6515 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6516 Opc = AArch64::ST2i8;
6517 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6518 Opc = AArch64::ST2i16;
6519 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6520 Opc = AArch64::ST2i32;
6521 else if (Ty == LLT::fixed_vector(2, S64) ||
6522 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6523 Opc = AArch64::ST2i64;
6524 else
6525 llvm_unreachable("Unexpected type for st2lane!");
6526 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6527 return false;
6528 break;
6529 }
6530 case Intrinsic::aarch64_neon_st3lane: {
6531 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6532 unsigned Opc;
6533 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6534 Opc = AArch64::ST3i8;
6535 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6536 Opc = AArch64::ST3i16;
6537 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6538 Opc = AArch64::ST3i32;
6539 else if (Ty == LLT::fixed_vector(2, S64) ||
6540 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6541 Opc = AArch64::ST3i64;
6542 else
6543 llvm_unreachable("Unexpected type for st3lane!");
6544 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6545 return false;
6546 break;
6547 }
6548 case Intrinsic::aarch64_neon_st4lane: {
6549 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6550 unsigned Opc;
6551 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6552 Opc = AArch64::ST4i8;
6553 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6554 Opc = AArch64::ST4i16;
6555 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6556 Opc = AArch64::ST4i32;
6557 else if (Ty == LLT::fixed_vector(2, S64) ||
6558 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6559 Opc = AArch64::ST4i64;
6560 else
6561 llvm_unreachable("Unexpected type for st4lane!");
6562 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6563 return false;
6564 break;
6565 }
6566 case Intrinsic::aarch64_mops_memset_tag: {
6567 // Transform
6568 // %dst:gpr(p0) = \
6569 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6570 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6571 // where %dst is updated, into
6572 // %Rd:GPR64common, %Rn:GPR64) = \
6573 // MOPSMemorySetTaggingPseudo \
6574 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6575 // where Rd and Rn are tied.
6576 // It is expected that %val has been extended to s64 in legalization.
6577 // Note that the order of the size/value operands are swapped.
6578
6579 Register DstDef = I.getOperand(0).getReg();
6580 // I.getOperand(1) is the intrinsic function
6581 Register DstUse = I.getOperand(2).getReg();
6582 Register ValUse = I.getOperand(3).getReg();
6583 Register SizeUse = I.getOperand(4).getReg();
6584
6585 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6586 // Therefore an additional virtual register is required for the updated size
6587 // operand. This value is not accessible via the semantics of the intrinsic.
6588 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6589
6590 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6591 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6592 Memset.cloneMemRefs(I);
6594 break;
6595 }
6596 }
6597
6598 I.eraseFromParent();
6599 return true;
6600}
6601
6602bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6603 MachineRegisterInfo &MRI) {
6604 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6605
6606 switch (IntrinID) {
6607 default:
6608 break;
6609 case Intrinsic::ptrauth_resign: {
6610 Register DstReg = I.getOperand(0).getReg();
6611 Register ValReg = I.getOperand(2).getReg();
6612 uint64_t AUTKey = I.getOperand(3).getImm();
6613 Register AUTDisc = I.getOperand(4).getReg();
6614 uint64_t PACKey = I.getOperand(5).getImm();
6615 Register PACDisc = I.getOperand(6).getReg();
6616
6617 Register AUTAddrDisc = AUTDisc;
6618 uint16_t AUTConstDiscC = 0;
6619 std::tie(AUTConstDiscC, AUTAddrDisc) =
6621
6622 Register PACAddrDisc = PACDisc;
6623 uint16_t PACConstDiscC = 0;
6624 std::tie(PACConstDiscC, PACAddrDisc) =
6626
6627 MIB.buildCopy({AArch64::X16}, {ValReg});
6628 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6629 MIB.buildInstr(AArch64::AUTPAC)
6630 .addImm(AUTKey)
6631 .addImm(AUTConstDiscC)
6632 .addUse(AUTAddrDisc)
6633 .addImm(PACKey)
6634 .addImm(PACConstDiscC)
6635 .addUse(PACAddrDisc)
6636 .constrainAllUses(TII, TRI, RBI);
6637 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6638
6639 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6640 I.eraseFromParent();
6641 return true;
6642 }
6643 case Intrinsic::ptrauth_auth: {
6644 Register DstReg = I.getOperand(0).getReg();
6645 Register ValReg = I.getOperand(2).getReg();
6646 uint64_t AUTKey = I.getOperand(3).getImm();
6647 Register AUTDisc = I.getOperand(4).getReg();
6648
6649 Register AUTAddrDisc = AUTDisc;
6650 uint16_t AUTConstDiscC = 0;
6651 std::tie(AUTConstDiscC, AUTAddrDisc) =
6653
6654 if (STI.isX16X17Safer()) {
6655 MIB.buildCopy({AArch64::X16}, {ValReg});
6656 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6657 MIB.buildInstr(AArch64::AUTx16x17)
6658 .addImm(AUTKey)
6659 .addImm(AUTConstDiscC)
6660 .addUse(AUTAddrDisc)
6661 .constrainAllUses(TII, TRI, RBI);
6662 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6663 } else {
6664 Register ScratchReg =
6665 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6666 MIB.buildInstr(AArch64::AUTxMxN)
6667 .addDef(DstReg)
6668 .addDef(ScratchReg)
6669 .addUse(ValReg)
6670 .addImm(AUTKey)
6671 .addImm(AUTConstDiscC)
6672 .addUse(AUTAddrDisc)
6673 .constrainAllUses(TII, TRI, RBI);
6674 }
6675
6676 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6677 I.eraseFromParent();
6678 return true;
6679 }
6680 case Intrinsic::frameaddress:
6681 case Intrinsic::returnaddress: {
6682 MachineFunction &MF = *I.getParent()->getParent();
6683 MachineFrameInfo &MFI = MF.getFrameInfo();
6684
6685 unsigned Depth = I.getOperand(2).getImm();
6686 Register DstReg = I.getOperand(0).getReg();
6687 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6688
6689 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6690 if (!MFReturnAddr) {
6691 // Insert the copy from LR/X30 into the entry block, before it can be
6692 // clobbered by anything.
6693 MFI.setReturnAddressIsTaken(true);
6694 MFReturnAddr = getFunctionLiveInPhysReg(
6695 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6696 }
6697
6698 if (STI.hasPAuth()) {
6699 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6700 } else {
6701 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6702 MIB.buildInstr(AArch64::XPACLRI);
6703 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6704 }
6705
6706 I.eraseFromParent();
6707 return true;
6708 }
6709
6710 MFI.setFrameAddressIsTaken(true);
6711 Register FrameAddr(AArch64::FP);
6712 while (Depth--) {
6713 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6714 auto Ldr =
6715 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6717 FrameAddr = NextFrame;
6718 }
6719
6720 if (IntrinID == Intrinsic::frameaddress)
6721 MIB.buildCopy({DstReg}, {FrameAddr});
6722 else {
6723 MFI.setReturnAddressIsTaken(true);
6724
6725 if (STI.hasPAuth()) {
6726 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6727 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6728 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6729 } else {
6730 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6731 .addImm(1);
6732 MIB.buildInstr(AArch64::XPACLRI);
6733 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6734 }
6735 }
6736
6737 I.eraseFromParent();
6738 return true;
6739 }
6740 case Intrinsic::aarch64_neon_tbl2:
6741 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6742 return true;
6743 case Intrinsic::aarch64_neon_tbl3:
6744 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6745 false);
6746 return true;
6747 case Intrinsic::aarch64_neon_tbl4:
6748 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6749 return true;
6750 case Intrinsic::aarch64_neon_tbx2:
6751 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6752 return true;
6753 case Intrinsic::aarch64_neon_tbx3:
6754 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6755 return true;
6756 case Intrinsic::aarch64_neon_tbx4:
6757 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6758 return true;
6759 case Intrinsic::swift_async_context_addr:
6760 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6761 {Register(AArch64::FP)})
6762 .addImm(8)
6763 .addImm(0);
6765
6767 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6768 I.eraseFromParent();
6769 return true;
6770 }
6771 return false;
6772}
6773
6774// G_PTRAUTH_GLOBAL_VALUE lowering
6775//
6776// We have 3 lowering alternatives to choose from:
6777// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6778// If the GV doesn't need a GOT load (i.e., is locally defined)
6779// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6780//
6781// - LOADgotPAC: similar to LOADgot, with added PAC.
6782// If the GV needs a GOT load, materialize the pointer using the usual
6783// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6784// section is assumed to be read-only (for example, via relro mechanism). See
6785// LowerMOVaddrPAC.
6786//
6787// - LOADauthptrstatic: similar to LOADgot, but use a
6788// special stub slot instead of a GOT slot.
6789// Load a signed pointer for symbol 'sym' from a stub slot named
6790// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6791// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6792// .data with an
6793// @AUTH relocation. See LowerLOADauthptrstatic.
6794//
6795// All 3 are pseudos that are expand late to longer sequences: this lets us
6796// provide integrity guarantees on the to-be-signed intermediate values.
6797//
6798// LOADauthptrstatic is undesirable because it requires a large section filled
6799// with often similarly-signed pointers, making it a good harvesting target.
6800// Thus, it's only used for ptrauth references to extern_weak to avoid null
6801// checks.
6802
6803bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6804 MachineInstr &I, MachineRegisterInfo &MRI) const {
6805 Register DefReg = I.getOperand(0).getReg();
6806 Register Addr = I.getOperand(1).getReg();
6807 uint64_t Key = I.getOperand(2).getImm();
6808 Register AddrDisc = I.getOperand(3).getReg();
6809 uint64_t Disc = I.getOperand(4).getImm();
6810 int64_t Offset = 0;
6811
6813 report_fatal_error("key in ptrauth global out of range [0, " +
6814 Twine((int)AArch64PACKey::LAST) + "]");
6815
6816 // Blend only works if the integer discriminator is 16-bit wide.
6817 if (!isUInt<16>(Disc))
6819 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6820
6821 // Choosing between 3 lowering alternatives is target-specific.
6822 if (!STI.isTargetELF() && !STI.isTargetMachO())
6823 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6824
6825 if (!MRI.hasOneDef(Addr))
6826 return false;
6827
6828 // First match any offset we take from the real global.
6829 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6830 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6831 Register OffsetReg = DefMI->getOperand(2).getReg();
6832 if (!MRI.hasOneDef(OffsetReg))
6833 return false;
6834 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6835 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6836 return false;
6837
6838 Addr = DefMI->getOperand(1).getReg();
6839 if (!MRI.hasOneDef(Addr))
6840 return false;
6841
6842 DefMI = &*MRI.def_instr_begin(Addr);
6843 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6844 }
6845
6846 // We should be left with a genuine unauthenticated GlobalValue.
6847 const GlobalValue *GV;
6848 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6849 GV = DefMI->getOperand(1).getGlobal();
6851 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6852 GV = DefMI->getOperand(2).getGlobal();
6854 } else {
6855 return false;
6856 }
6857
6858 MachineIRBuilder MIB(I);
6859
6860 // Classify the reference to determine whether it needs a GOT load.
6861 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6862 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6863 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6864 "unsupported non-GOT op flags on ptrauth global reference");
6865 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6866 "unsupported non-GOT reference to weak ptrauth global");
6867
6868 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6869 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6870
6871 // Non-extern_weak:
6872 // - No GOT load needed -> MOVaddrPAC
6873 // - GOT load for non-extern_weak -> LOADgotPAC
6874 // Note that we disallow extern_weak refs to avoid null checks later.
6875 if (!GV->hasExternalWeakLinkage()) {
6876 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6877 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6878 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6880 .addImm(Key)
6881 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6882 .addImm(Disc)
6883 .constrainAllUses(TII, TRI, RBI);
6884 MIB.buildCopy(DefReg, Register(AArch64::X16));
6885 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6886 I.eraseFromParent();
6887 return true;
6888 }
6889
6890 // extern_weak -> LOADauthptrstatic
6891
6892 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6893 // offset alone as a pointer if the symbol wasn't available, which would
6894 // probably break null checks in users. Ptrauth complicates things further:
6895 // error out.
6896 if (Offset != 0)
6898 "unsupported non-zero offset in weak ptrauth global reference");
6899
6900 if (HasAddrDisc)
6901 report_fatal_error("unsupported weak addr-div ptrauth global");
6902
6903 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6904 .addGlobalAddress(GV, Offset)
6905 .addImm(Key)
6906 .addImm(Disc);
6907 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6908
6909 I.eraseFromParent();
6910 return true;
6911}
6912
6913void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6914 MachineRegisterInfo &MRI,
6915 unsigned NumVec, unsigned Opc1,
6916 unsigned Opc2, bool isExt) {
6917 Register DstReg = I.getOperand(0).getReg();
6918 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6919
6920 // Create the REG_SEQUENCE
6922 for (unsigned i = 0; i < NumVec; i++)
6923 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6924 Register RegSeq = createQTuple(Regs, MIB);
6925
6926 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6927 MachineInstrBuilder Instr;
6928 if (isExt) {
6929 Register Reg = I.getOperand(2).getReg();
6930 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6931 } else
6932 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6934 I.eraseFromParent();
6935}
6936
6937InstructionSelector::ComplexRendererFns
6938AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6939 auto MaybeImmed = getImmedFromMO(Root);
6940 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6941 return std::nullopt;
6942 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6943 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6944}
6945
6946InstructionSelector::ComplexRendererFns
6947AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6948 auto MaybeImmed = getImmedFromMO(Root);
6949 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6950 return std::nullopt;
6951 uint64_t Enc = 31 - *MaybeImmed;
6952 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6953}
6954
6955InstructionSelector::ComplexRendererFns
6956AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6957 auto MaybeImmed = getImmedFromMO(Root);
6958 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6959 return std::nullopt;
6960 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6961 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6962}
6963
6964InstructionSelector::ComplexRendererFns
6965AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6966 auto MaybeImmed = getImmedFromMO(Root);
6967 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6968 return std::nullopt;
6969 uint64_t Enc = 63 - *MaybeImmed;
6970 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6971}
6972
6973/// Helper to select an immediate value that can be represented as a 12-bit
6974/// value shifted left by either 0 or 12. If it is possible to do so, return
6975/// the immediate and shift value. If not, return std::nullopt.
6976///
6977/// Used by selectArithImmed and selectNegArithImmed.
6978InstructionSelector::ComplexRendererFns
6979AArch64InstructionSelector::select12BitValueWithLeftShift(
6980 uint64_t Immed) const {
6981 unsigned ShiftAmt;
6982 if (Immed >> 12 == 0) {
6983 ShiftAmt = 0;
6984 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6985 ShiftAmt = 12;
6986 Immed = Immed >> 12;
6987 } else
6988 return std::nullopt;
6989
6990 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6991 return {{
6992 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6993 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6994 }};
6995}
6996
6997/// SelectArithImmed - Select an immediate value that can be represented as
6998/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6999/// Val set to the 12-bit value and Shift set to the shifter operand.
7000InstructionSelector::ComplexRendererFns
7001AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7002 // This function is called from the addsub_shifted_imm ComplexPattern,
7003 // which lists [imm] as the list of opcode it's interested in, however
7004 // we still need to check whether the operand is actually an immediate
7005 // here because the ComplexPattern opcode list is only used in
7006 // root-level opcode matching.
7007 auto MaybeImmed = getImmedFromMO(Root);
7008 if (MaybeImmed == std::nullopt)
7009 return std::nullopt;
7010 return select12BitValueWithLeftShift(*MaybeImmed);
7011}
7012
7013/// SelectNegArithImmed - As above, but negates the value before trying to
7014/// select it.
7015InstructionSelector::ComplexRendererFns
7016AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7017 // We need a register here, because we need to know if we have a 64 or 32
7018 // bit immediate.
7019 if (!Root.isReg())
7020 return std::nullopt;
7021 auto MaybeImmed = getImmedFromMO(Root);
7022 if (MaybeImmed == std::nullopt)
7023 return std::nullopt;
7024 uint64_t Immed = *MaybeImmed;
7025
7026 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7027 // have the opposite effect on the C flag, so this pattern mustn't match under
7028 // those circumstances.
7029 if (Immed == 0)
7030 return std::nullopt;
7031
7032 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7033 // the root.
7034 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7035 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7036 Immed = ~((uint32_t)Immed) + 1;
7037 else
7038 Immed = ~Immed + 1ULL;
7039
7040 if (Immed & 0xFFFFFFFFFF000000ULL)
7041 return std::nullopt;
7042
7043 Immed &= 0xFFFFFFULL;
7044 return select12BitValueWithLeftShift(Immed);
7045}
7046
7047/// Checks if we are sure that folding MI into load/store addressing mode is
7048/// beneficial or not.
7049///
7050/// Returns:
7051/// - true if folding MI would be beneficial.
7052/// - false if folding MI would be bad.
7053/// - std::nullopt if it is not sure whether folding MI is beneficial.
7054///
7055/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7056///
7057/// %13:gpr(s64) = G_CONSTANT i64 1
7058/// %8:gpr(s64) = G_SHL %6, %13(s64)
7059/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7060/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7061std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7062 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7063 if (MI.getOpcode() == AArch64::G_SHL) {
7064 // Address operands with shifts are free, except for running on subtargets
7065 // with AddrLSLSlow14.
7066 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7067 MI.getOperand(2).getReg(), MRI)) {
7068 const APInt ShiftVal = ValAndVeg->Value;
7069
7070 // Don't fold if we know this will be slow.
7071 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7072 }
7073 }
7074 return std::nullopt;
7075}
7076
7077/// Return true if it is worth folding MI into an extended register. That is,
7078/// if it's safe to pull it into the addressing mode of a load or store as a
7079/// shift.
7080/// \p IsAddrOperand whether the def of MI is used as an address operand
7081/// (e.g. feeding into an LDR/STR).
7082bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7083 MachineInstr &MI, const MachineRegisterInfo &MRI,
7084 bool IsAddrOperand) const {
7085
7086 // Always fold if there is one use, or if we're optimizing for size.
7087 Register DefReg = MI.getOperand(0).getReg();
7088 if (MRI.hasOneNonDBGUse(DefReg) ||
7089 MI.getParent()->getParent()->getFunction().hasOptSize())
7090 return true;
7091
7092 if (IsAddrOperand) {
7093 // If we are already sure that folding MI is good or bad, return the result.
7094 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7095 return *Worth;
7096
7097 // Fold G_PTR_ADD if its offset operand can be folded
7098 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7099 MachineInstr *OffsetInst =
7100 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7101
7102 // Note, we already know G_PTR_ADD is used by at least two instructions.
7103 // If we are also sure about whether folding is beneficial or not,
7104 // return the result.
7105 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7106 return *Worth;
7107 }
7108 }
7109
7110 // FIXME: Consider checking HasALULSLFast as appropriate.
7111
7112 // We have a fastpath, so folding a shift in and potentially computing it
7113 // many times may be beneficial. Check if this is only used in memory ops.
7114 // If it is, then we should fold.
7115 return all_of(MRI.use_nodbg_instructions(DefReg),
7116 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7117}
7118
7120 switch (Type) {
7121 case AArch64_AM::SXTB:
7122 case AArch64_AM::SXTH:
7123 case AArch64_AM::SXTW:
7124 return true;
7125 default:
7126 return false;
7127 }
7128}
7129
7130InstructionSelector::ComplexRendererFns
7131AArch64InstructionSelector::selectExtendedSHL(
7132 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7133 unsigned SizeInBytes, bool WantsExt) const {
7134 assert(Base.isReg() && "Expected base to be a register operand");
7135 assert(Offset.isReg() && "Expected offset to be a register operand");
7136
7137 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7138 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7139
7140 unsigned OffsetOpc = OffsetInst->getOpcode();
7141 bool LookedThroughZExt = false;
7142 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7143 // Try to look through a ZEXT.
7144 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7145 return std::nullopt;
7146
7147 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7148 OffsetOpc = OffsetInst->getOpcode();
7149 LookedThroughZExt = true;
7150
7151 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7152 return std::nullopt;
7153 }
7154 // Make sure that the memory op is a valid size.
7155 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7156 if (LegalShiftVal == 0)
7157 return std::nullopt;
7158 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7159 return std::nullopt;
7160
7161 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7162 // register we will offset is the LHS, and the register containing the
7163 // constant is the RHS.
7164 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7165 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7166 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7167 if (!ValAndVReg) {
7168 // We didn't get a constant on the RHS. If the opcode is a shift, then
7169 // we're done.
7170 if (OffsetOpc == TargetOpcode::G_SHL)
7171 return std::nullopt;
7172
7173 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7174 std::swap(OffsetReg, ConstantReg);
7175 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7176 if (!ValAndVReg)
7177 return std::nullopt;
7178 }
7179
7180 // The value must fit into 3 bits, and must be positive. Make sure that is
7181 // true.
7182 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7183
7184 // Since we're going to pull this into a shift, the constant value must be
7185 // a power of 2. If we got a multiply, then we need to check this.
7186 if (OffsetOpc == TargetOpcode::G_MUL) {
7187 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7188 return std::nullopt;
7189
7190 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7191 ImmVal = Log2_32(ImmVal);
7192 }
7193
7194 if ((ImmVal & 0x7) != ImmVal)
7195 return std::nullopt;
7196
7197 // We are only allowed to shift by LegalShiftVal. This shift value is built
7198 // into the instruction, so we can't just use whatever we want.
7199 if (ImmVal != LegalShiftVal)
7200 return std::nullopt;
7201
7202 unsigned SignExtend = 0;
7203 if (WantsExt) {
7204 // Check if the offset is defined by an extend, unless we looked through a
7205 // G_ZEXT earlier.
7206 if (!LookedThroughZExt) {
7207 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7208 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7210 return std::nullopt;
7211
7212 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7213 // We only support SXTW for signed extension here.
7214 if (SignExtend && Ext != AArch64_AM::SXTW)
7215 return std::nullopt;
7216 OffsetReg = ExtInst->getOperand(1).getReg();
7217 }
7218
7219 // Need a 32-bit wide register here.
7220 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7221 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7222 }
7223
7224 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7225 // offset. Signify that we are shifting by setting the shift flag to 1.
7226 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7227 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7228 [=](MachineInstrBuilder &MIB) {
7229 // Need to add both immediates here to make sure that they are both
7230 // added to the instruction.
7231 MIB.addImm(SignExtend);
7232 MIB.addImm(1);
7233 }}};
7234}
7235
7236/// This is used for computing addresses like this:
7237///
7238/// ldr x1, [x2, x3, lsl #3]
7239///
7240/// Where x2 is the base register, and x3 is an offset register. The shift-left
7241/// is a constant value specific to this load instruction. That is, we'll never
7242/// see anything other than a 3 here (which corresponds to the size of the
7243/// element being loaded.)
7244InstructionSelector::ComplexRendererFns
7245AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7246 MachineOperand &Root, unsigned SizeInBytes) const {
7247 if (!Root.isReg())
7248 return std::nullopt;
7249 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7250
7251 // We want to find something like this:
7252 //
7253 // val = G_CONSTANT LegalShiftVal
7254 // shift = G_SHL off_reg val
7255 // ptr = G_PTR_ADD base_reg shift
7256 // x = G_LOAD ptr
7257 //
7258 // And fold it into this addressing mode:
7259 //
7260 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7261
7262 // Check if we can find the G_PTR_ADD.
7263 MachineInstr *PtrAdd =
7264 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7265 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7266 return std::nullopt;
7267
7268 // Now, try to match an opcode which will match our specific offset.
7269 // We want a G_SHL or a G_MUL.
7270 MachineInstr *OffsetInst =
7272 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7273 OffsetInst->getOperand(0), SizeInBytes,
7274 /*WantsExt=*/false);
7275}
7276
7277/// This is used for computing addresses like this:
7278///
7279/// ldr x1, [x2, x3]
7280///
7281/// Where x2 is the base register, and x3 is an offset register.
7282///
7283/// When possible (or profitable) to fold a G_PTR_ADD into the address
7284/// calculation, this will do so. Otherwise, it will return std::nullopt.
7285InstructionSelector::ComplexRendererFns
7286AArch64InstructionSelector::selectAddrModeRegisterOffset(
7287 MachineOperand &Root) const {
7288 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7289
7290 // We need a GEP.
7291 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7292 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7293 return std::nullopt;
7294
7295 // If this is used more than once, let's not bother folding.
7296 // TODO: Check if they are memory ops. If they are, then we can still fold
7297 // without having to recompute anything.
7298 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7299 return std::nullopt;
7300
7301 // Base is the GEP's LHS, offset is its RHS.
7302 return {{[=](MachineInstrBuilder &MIB) {
7303 MIB.addUse(Gep->getOperand(1).getReg());
7304 },
7305 [=](MachineInstrBuilder &MIB) {
7306 MIB.addUse(Gep->getOperand(2).getReg());
7307 },
7308 [=](MachineInstrBuilder &MIB) {
7309 // Need to add both immediates here to make sure that they are both
7310 // added to the instruction.
7311 MIB.addImm(0);
7312 MIB.addImm(0);
7313 }}};
7314}
7315
7316/// This is intended to be equivalent to selectAddrModeXRO in
7317/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7318InstructionSelector::ComplexRendererFns
7319AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7320 unsigned SizeInBytes) const {
7321 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7322 if (!Root.isReg())
7323 return std::nullopt;
7324 MachineInstr *PtrAdd =
7325 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7326 if (!PtrAdd)
7327 return std::nullopt;
7328
7329 // Check for an immediates which cannot be encoded in the [base + imm]
7330 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7331 // end up with code like:
7332 //
7333 // mov x0, wide
7334 // add x1 base, x0
7335 // ldr x2, [x1, x0]
7336 //
7337 // In this situation, we can use the [base, xreg] addressing mode to save an
7338 // add/sub:
7339 //
7340 // mov x0, wide
7341 // ldr x2, [base, x0]
7342 auto ValAndVReg =
7344 if (ValAndVReg) {
7345 unsigned Scale = Log2_32(SizeInBytes);
7346 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7347
7348 // Skip immediates that can be selected in the load/store addressing
7349 // mode.
7350 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7351 ImmOff < (0x1000 << Scale))
7352 return std::nullopt;
7353
7354 // Helper lambda to decide whether or not it is preferable to emit an add.
7355 auto isPreferredADD = [](int64_t ImmOff) {
7356 // Constants in [0x0, 0xfff] can be encoded in an add.
7357 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7358 return true;
7359
7360 // Can it be encoded in an add lsl #12?
7361 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7362 return false;
7363
7364 // It can be encoded in an add lsl #12, but we may not want to. If it is
7365 // possible to select this as a single movz, then prefer that. A single
7366 // movz is faster than an add with a shift.
7367 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7368 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7369 };
7370
7371 // If the immediate can be encoded in a single add/sub, then bail out.
7372 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7373 return std::nullopt;
7374 }
7375
7376 // Try to fold shifts into the addressing mode.
7377 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7378 if (AddrModeFns)
7379 return AddrModeFns;
7380
7381 // If that doesn't work, see if it's possible to fold in registers from
7382 // a GEP.
7383 return selectAddrModeRegisterOffset(Root);
7384}
7385
7386/// This is used for computing addresses like this:
7387///
7388/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7389///
7390/// Where we have a 64-bit base register, a 32-bit offset register, and an
7391/// extend (which may or may not be signed).
7392InstructionSelector::ComplexRendererFns
7393AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7394 unsigned SizeInBytes) const {
7395 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7396
7397 MachineInstr *PtrAdd =
7398 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7399 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7400 return std::nullopt;
7401
7402 MachineOperand &LHS = PtrAdd->getOperand(1);
7403 MachineOperand &RHS = PtrAdd->getOperand(2);
7404 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7405
7406 // The first case is the same as selectAddrModeXRO, except we need an extend.
7407 // In this case, we try to find a shift and extend, and fold them into the
7408 // addressing mode.
7409 //
7410 // E.g.
7411 //
7412 // off_reg = G_Z/S/ANYEXT ext_reg
7413 // val = G_CONSTANT LegalShiftVal
7414 // shift = G_SHL off_reg val
7415 // ptr = G_PTR_ADD base_reg shift
7416 // x = G_LOAD ptr
7417 //
7418 // In this case we can get a load like this:
7419 //
7420 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7421 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7422 SizeInBytes, /*WantsExt=*/true);
7423 if (ExtendedShl)
7424 return ExtendedShl;
7425
7426 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7427 //
7428 // e.g.
7429 // ldr something, [base_reg, ext_reg, sxtw]
7430 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7431 return std::nullopt;
7432
7433 // Check if this is an extend. We'll get an extend type if it is.
7435 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7437 return std::nullopt;
7438
7439 // Need a 32-bit wide register.
7440 MachineIRBuilder MIB(*PtrAdd);
7441 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7442 AArch64::GPR32RegClass, MIB);
7443 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7444
7445 // Base is LHS, offset is ExtReg.
7446 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7447 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7448 [=](MachineInstrBuilder &MIB) {
7449 MIB.addImm(SignExtend);
7450 MIB.addImm(0);
7451 }}};
7452}
7453
7454/// Select a "register plus unscaled signed 9-bit immediate" address. This
7455/// should only match when there is an offset that is not valid for a scaled
7456/// immediate addressing mode. The "Size" argument is the size in bytes of the
7457/// memory reference, which is needed here to know what is valid for a scaled
7458/// immediate.
7459InstructionSelector::ComplexRendererFns
7460AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7461 unsigned Size) const {
7462 MachineRegisterInfo &MRI =
7463 Root.getParent()->getParent()->getParent()->getRegInfo();
7464
7465 if (!Root.isReg())
7466 return std::nullopt;
7467
7468 if (!isBaseWithConstantOffset(Root, MRI))
7469 return std::nullopt;
7470
7471 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7472
7473 MachineOperand &OffImm = RootDef->getOperand(2);
7474 if (!OffImm.isReg())
7475 return std::nullopt;
7476 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7477 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7478 return std::nullopt;
7479 int64_t RHSC;
7480 MachineOperand &RHSOp1 = RHS->getOperand(1);
7481 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7482 return std::nullopt;
7483 RHSC = RHSOp1.getCImm()->getSExtValue();
7484
7485 if (RHSC >= -256 && RHSC < 256) {
7486 MachineOperand &Base = RootDef->getOperand(1);
7487 return {{
7488 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7489 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7490 }};
7491 }
7492 return std::nullopt;
7493}
7494
7495InstructionSelector::ComplexRendererFns
7496AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7497 unsigned Size,
7498 MachineRegisterInfo &MRI) const {
7499 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7500 return std::nullopt;
7501 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7502 if (Adrp.getOpcode() != AArch64::ADRP)
7503 return std::nullopt;
7504
7505 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7506 auto Offset = Adrp.getOperand(1).getOffset();
7507 if (Offset % Size != 0)
7508 return std::nullopt;
7509
7510 auto GV = Adrp.getOperand(1).getGlobal();
7511 if (GV->isThreadLocal())
7512 return std::nullopt;
7513
7514 auto &MF = *RootDef.getParent()->getParent();
7515 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7516 return std::nullopt;
7517
7518 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7519 MachineIRBuilder MIRBuilder(RootDef);
7520 Register AdrpReg = Adrp.getOperand(0).getReg();
7521 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7522 [=](MachineInstrBuilder &MIB) {
7523 MIB.addGlobalAddress(GV, Offset,
7524 OpFlags | AArch64II::MO_PAGEOFF |
7526 }}};
7527}
7528
7529/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7530/// "Size" argument is the size in bytes of the memory reference, which
7531/// determines the scale.
7532InstructionSelector::ComplexRendererFns
7533AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7534 unsigned Size) const {
7535 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7536 MachineRegisterInfo &MRI = MF.getRegInfo();
7537
7538 if (!Root.isReg())
7539 return std::nullopt;
7540
7541 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7542 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7543 return {{
7544 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7545 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7546 }};
7547 }
7548
7550 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7551 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7552 // globals into the offset.
7553 MachineInstr *RootParent = Root.getParent();
7554 if (CM == CodeModel::Small &&
7555 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7556 STI.isTargetDarwin())) {
7557 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7558 if (OpFns)
7559 return OpFns;
7560 }
7561
7562 if (isBaseWithConstantOffset(Root, MRI)) {
7563 MachineOperand &LHS = RootDef->getOperand(1);
7564 MachineOperand &RHS = RootDef->getOperand(2);
7565 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7566 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7567
7568 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7569 unsigned Scale = Log2_32(Size);
7570 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7571 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7572 return {{
7573 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7574 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7575 }};
7576
7577 return {{
7578 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7579 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7580 }};
7581 }
7582 }
7583
7584 // Before falling back to our general case, check if the unscaled
7585 // instructions can handle this. If so, that's preferable.
7586 if (selectAddrModeUnscaled(Root, Size))
7587 return std::nullopt;
7588
7589 return {{
7590 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7591 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7592 }};
7593}
7594
7595/// Given a shift instruction, return the correct shift type for that
7596/// instruction.
7598 switch (MI.getOpcode()) {
7599 default:
7601 case TargetOpcode::G_SHL:
7602 return AArch64_AM::LSL;
7603 case TargetOpcode::G_LSHR:
7604 return AArch64_AM::LSR;
7605 case TargetOpcode::G_ASHR:
7606 return AArch64_AM::ASR;
7607 case TargetOpcode::G_ROTR:
7608 return AArch64_AM::ROR;
7609 }
7610}
7611
7612/// Select a "shifted register" operand. If the value is not shifted, set the
7613/// shift operand to a default value of "lsl 0".
7614InstructionSelector::ComplexRendererFns
7615AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7616 bool AllowROR) const {
7617 if (!Root.isReg())
7618 return std::nullopt;
7619 MachineRegisterInfo &MRI =
7620 Root.getParent()->getParent()->getParent()->getRegInfo();
7621
7622 // Check if the operand is defined by an instruction which corresponds to
7623 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7624 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7626 if (ShType == AArch64_AM::InvalidShiftExtend)
7627 return std::nullopt;
7628 if (ShType == AArch64_AM::ROR && !AllowROR)
7629 return std::nullopt;
7630 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7631 return std::nullopt;
7632
7633 // Need an immediate on the RHS.
7634 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7635 auto Immed = getImmedFromMO(ShiftRHS);
7636 if (!Immed)
7637 return std::nullopt;
7638
7639 // We have something that we can fold. Fold in the shift's LHS and RHS into
7640 // the instruction.
7641 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7642 Register ShiftReg = ShiftLHS.getReg();
7643
7644 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7645 unsigned Val = *Immed & (NumBits - 1);
7646 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7647
7648 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7649 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7650}
7651
7652AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7653 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7654 unsigned Opc = MI.getOpcode();
7655
7656 // Handle explicit extend instructions first.
7657 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7658 unsigned Size;
7659 if (Opc == TargetOpcode::G_SEXT)
7660 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7661 else
7662 Size = MI.getOperand(2).getImm();
7663 assert(Size != 64 && "Extend from 64 bits?");
7664 switch (Size) {
7665 case 8:
7666 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7667 case 16:
7668 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7669 case 32:
7670 return AArch64_AM::SXTW;
7671 default:
7673 }
7674 }
7675
7676 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7677 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7678 assert(Size != 64 && "Extend from 64 bits?");
7679 switch (Size) {
7680 case 8:
7681 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7682 case 16:
7683 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7684 case 32:
7685 return AArch64_AM::UXTW;
7686 default:
7688 }
7689 }
7690
7691 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7692 // on the RHS.
7693 if (Opc != TargetOpcode::G_AND)
7695
7696 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7697 if (!MaybeAndMask)
7699 uint64_t AndMask = *MaybeAndMask;
7700 switch (AndMask) {
7701 default:
7703 case 0xFF:
7704 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7705 case 0xFFFF:
7706 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7707 case 0xFFFFFFFF:
7708 return AArch64_AM::UXTW;
7709 }
7710}
7711
7712Register AArch64InstructionSelector::moveScalarRegClass(
7713 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7714 MachineRegisterInfo &MRI = *MIB.getMRI();
7715 auto Ty = MRI.getType(Reg);
7716 assert(!Ty.isVector() && "Expected scalars only!");
7717 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7718 return Reg;
7719
7720 // Create a copy and immediately select it.
7721 // FIXME: We should have an emitCopy function?
7722 auto Copy = MIB.buildCopy({&RC}, {Reg});
7723 selectCopy(*Copy, TII, MRI, TRI, RBI);
7724 return Copy.getReg(0);
7725}
7726
7727/// Select an "extended register" operand. This operand folds in an extend
7728/// followed by an optional left shift.
7729InstructionSelector::ComplexRendererFns
7730AArch64InstructionSelector::selectArithExtendedRegister(
7731 MachineOperand &Root) const {
7732 if (!Root.isReg())
7733 return std::nullopt;
7734 MachineRegisterInfo &MRI =
7735 Root.getParent()->getParent()->getParent()->getRegInfo();
7736
7737 uint64_t ShiftVal = 0;
7738 Register ExtReg;
7740 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7741 if (!RootDef)
7742 return std::nullopt;
7743
7744 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7745 return std::nullopt;
7746
7747 // Check if we can fold a shift and an extend.
7748 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7749 // Look for a constant on the RHS of the shift.
7750 MachineOperand &RHS = RootDef->getOperand(2);
7751 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7752 if (!MaybeShiftVal)
7753 return std::nullopt;
7754 ShiftVal = *MaybeShiftVal;
7755 if (ShiftVal > 4)
7756 return std::nullopt;
7757 // Look for a valid extend instruction on the LHS of the shift.
7758 MachineOperand &LHS = RootDef->getOperand(1);
7759 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7760 if (!ExtDef)
7761 return std::nullopt;
7762 Ext = getExtendTypeForInst(*ExtDef, MRI);
7764 return std::nullopt;
7765 ExtReg = ExtDef->getOperand(1).getReg();
7766 } else {
7767 // Didn't get a shift. Try just folding an extend.
7768 Ext = getExtendTypeForInst(*RootDef, MRI);
7770 return std::nullopt;
7771 ExtReg = RootDef->getOperand(1).getReg();
7772
7773 // If we have a 32 bit instruction which zeroes out the high half of a
7774 // register, we get an implicit zero extend for free. Check if we have one.
7775 // FIXME: We actually emit the extend right now even though we don't have
7776 // to.
7777 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7778 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7779 if (isDef32(*ExtInst))
7780 return std::nullopt;
7781 }
7782 }
7783
7784 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7785 // copy.
7786 MachineIRBuilder MIB(*RootDef);
7787 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7788
7789 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7790 [=](MachineInstrBuilder &MIB) {
7791 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7792 }}};
7793}
7794
7795InstructionSelector::ComplexRendererFns
7796AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7797 if (!Root.isReg())
7798 return std::nullopt;
7799 MachineRegisterInfo &MRI =
7800 Root.getParent()->getParent()->getParent()->getRegInfo();
7801
7802 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7803 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7804 STI.isLittleEndian())
7805 Extract =
7806 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7807 if (!Extract)
7808 return std::nullopt;
7809
7810 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7811 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7812 Register ExtReg = Extract->MI->getOperand(2).getReg();
7813 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7814 }
7815 }
7816 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7817 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7819 Extract->MI->getOperand(2).getReg(), MRI);
7820 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7821 LaneIdx->Value.getSExtValue() == 1) {
7822 Register ExtReg = Extract->MI->getOperand(1).getReg();
7823 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7824 }
7825 }
7826
7827 return std::nullopt;
7828}
7829
7830void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7831 const MachineInstr &MI,
7832 int OpIdx) const {
7833 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7834 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7835 "Expected G_CONSTANT");
7836 std::optional<int64_t> CstVal =
7837 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7838 assert(CstVal && "Expected constant value");
7839 MIB.addImm(*CstVal);
7840}
7841
7842void AArch64InstructionSelector::renderLogicalImm32(
7843 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7844 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7845 "Expected G_CONSTANT");
7846 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7847 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7848 MIB.addImm(Enc);
7849}
7850
7851void AArch64InstructionSelector::renderLogicalImm64(
7852 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7853 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7854 "Expected G_CONSTANT");
7855 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7856 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7857 MIB.addImm(Enc);
7858}
7859
7860void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7861 const MachineInstr &MI,
7862 int OpIdx) const {
7863 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7864 "Expected G_UBSANTRAP");
7865 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7866}
7867
7868void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7869 const MachineInstr &MI,
7870 int OpIdx) const {
7871 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7872 "Expected G_FCONSTANT");
7873 MIB.addImm(
7874 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7875}
7876
7877void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7878 const MachineInstr &MI,
7879 int OpIdx) const {
7880 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7881 "Expected G_FCONSTANT");
7882 MIB.addImm(
7883 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7884}
7885
7886void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7887 const MachineInstr &MI,
7888 int OpIdx) const {
7889 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7890 "Expected G_FCONSTANT");
7891 MIB.addImm(
7892 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7893}
7894
7895void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7896 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7897 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7898 "Expected G_FCONSTANT");
7900 .getFPImm()
7901 ->getValueAPF()
7902 .bitcastToAPInt()
7903 .getZExtValue()));
7904}
7905
7906bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7907 const MachineInstr &MI, unsigned NumBytes) const {
7908 if (!MI.mayLoadOrStore())
7909 return false;
7910 assert(MI.hasOneMemOperand() &&
7911 "Expected load/store to have only one mem op!");
7912 return (*MI.memoperands_begin())->getSize() == NumBytes;
7913}
7914
7915bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7916 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7917 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7918 return false;
7919
7920 // Only return true if we know the operation will zero-out the high half of
7921 // the 64-bit register. Truncates can be subregister copies, which don't
7922 // zero out the high bits. Copies and other copy-like instructions can be
7923 // fed by truncates, or could be lowered as subregister copies.
7924 switch (MI.getOpcode()) {
7925 default:
7926 return true;
7927 case TargetOpcode::COPY:
7928 case TargetOpcode::G_BITCAST:
7929 case TargetOpcode::G_TRUNC:
7930 case TargetOpcode::G_PHI:
7931 return false;
7932 }
7933}
7934
7935
7936// Perform fixups on the given PHI instruction's operands to force them all
7937// to be the same as the destination regbank.
7939 const AArch64RegisterBankInfo &RBI) {
7940 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7941 Register DstReg = MI.getOperand(0).getReg();
7942 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7943 assert(DstRB && "Expected PHI dst to have regbank assigned");
7944 MachineIRBuilder MIB(MI);
7945
7946 // Go through each operand and ensure it has the same regbank.
7947 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7948 if (!MO.isReg())
7949 continue;
7950 Register OpReg = MO.getReg();
7951 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7952 if (RB != DstRB) {
7953 // Insert a cross-bank copy.
7954 auto *OpDef = MRI.getVRegDef(OpReg);
7955 const LLT &Ty = MRI.getType(OpReg);
7956 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7957
7958 // Any instruction we insert must appear after all PHIs in the block
7959 // for the block to be valid MIR.
7960 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7961 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7962 InsertPt = OpDefBB.getFirstNonPHI();
7963 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7964 auto Copy = MIB.buildCopy(Ty, OpReg);
7965 MRI.setRegBank(Copy.getReg(0), *DstRB);
7966 MO.setReg(Copy.getReg(0));
7967 }
7968 }
7969}
7970
7971void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7972 // We're looking for PHIs, build a list so we don't invalidate iterators.
7973 MachineRegisterInfo &MRI = MF.getRegInfo();
7975 for (auto &BB : MF) {
7976 for (auto &MI : BB) {
7977 if (MI.getOpcode() == TargetOpcode::G_PHI)
7978 Phis.emplace_back(&MI);
7979 }
7980 }
7981
7982 for (auto *MI : Phis) {
7983 // We need to do some work here if the operand types are < 16 bit and they
7984 // are split across fpr/gpr banks. Since all types <32b on gpr
7985 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7986 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7987 // be selecting heterogenous regbanks for operands if possible, but we
7988 // still need to be able to deal with it here.
7989 //
7990 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7991 // one other operand is on the fpr bank, then we add cross-bank copies
7992 // to homogenize the operand banks. For simplicity the bank that we choose
7993 // to settle on is whatever bank the def operand has. For example:
7994 //
7995 // %endbb:
7996 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7997 // =>
7998 // %bb2:
7999 // ...
8000 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8001 // ...
8002 // %endbb:
8003 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8004 bool HasGPROp = false, HasFPROp = false;
8005 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8006 if (!MO.isReg())
8007 continue;
8008 const LLT &Ty = MRI.getType(MO.getReg());
8009 if (!Ty.isValid() || !Ty.isScalar())
8010 break;
8011 if (Ty.getSizeInBits() >= 32)
8012 break;
8013 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8014 // If for some reason we don't have a regbank yet. Don't try anything.
8015 if (!RB)
8016 break;
8017
8018 if (RB->getID() == AArch64::GPRRegBankID)
8019 HasGPROp = true;
8020 else
8021 HasFPROp = true;
8022 }
8023 // We have heterogenous regbanks, need to fixup.
8024 if (HasGPROp && HasFPROp)
8025 fixupPHIOpBanks(*MI, MRI, RBI);
8026 }
8027}
8028
8029namespace llvm {
8030InstructionSelector *
8032 const AArch64Subtarget &Subtarget,
8033 const AArch64RegisterBankInfo &RBI) {
8034 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8035}
8036}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isIntPredicate() const
Definition InstrTypes.h:783
bool isUnsigned() const
Definition InstrTypes.h:936
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:320
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:327
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:324
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:557
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
constexpr double e
Definition MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:915
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.