LLVM 23.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
318 MachineIRBuilder &MIRBuilder) const;
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323 const RegisterBank &DstRB, LLT ScalarTy,
324 Register VecReg, unsigned LaneIdx,
325 MachineIRBuilder &MIRBuilder) const;
326 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
328 MachineIRBuilder &MIRBuilder) const;
329 /// Emit a CSet for a FP compare.
330 ///
331 /// \p Dst is expected to be a 32-bit scalar register.
332 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333 MachineIRBuilder &MIRBuilder) const;
334
335 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336 /// Might elide the instruction if the previous instruction already sets NZCV
337 /// correctly.
338 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340 /// Emit the overflow op for \p Opcode.
341 ///
342 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343 /// G_USUBO, etc.
344 std::pair<MachineInstr *, AArch64CC::CondCode>
345 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351 /// In some cases this is even possible with OR operations in the expression.
353 MachineIRBuilder &MIB) const;
358 MachineIRBuilder &MIB) const;
360 bool Negate, Register CCOp,
362 MachineIRBuilder &MIB) const;
363
364 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365 /// \p IsNegative is true if the test should be "not zero".
366 /// This will also optimize the test bit instruction when possible.
367 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
368 MachineBasicBlock *DstMBB,
369 MachineIRBuilder &MIB) const;
370
371 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
372 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
373 MachineBasicBlock *DestMBB,
374 MachineIRBuilder &MIB) const;
375
376 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377 // We use these manually instead of using the importer since it doesn't
378 // support SDNodeXForm.
379 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389 unsigned Size) const;
390
391 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 1);
393 }
394 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 2);
396 }
397 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 4);
399 }
400 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 8);
402 }
403 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404 return selectAddrModeUnscaled(Root, 16);
405 }
406
407 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408 /// from complex pattern matchers like selectAddrModeIndexed().
409 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410 MachineRegisterInfo &MRI) const;
411
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413 unsigned Size) const;
414 template <int Width>
415 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416 return selectAddrModeIndexed(Root, Width / 8);
417 }
418
419 std::optional<bool>
420 isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421 const MachineRegisterInfo &MRI) const;
422
423 bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424 const MachineRegisterInfo &MRI,
425 bool IsAddrOperand) const;
426 ComplexRendererFns
427 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429
430 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431 /// or not a shift + extend should be folded into an addressing mode. Returns
432 /// None when this is not profitable or possible.
433 ComplexRendererFns
434 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435 MachineOperand &Offset, unsigned SizeInBytes,
436 bool WantsExt) const;
437 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442 return selectAddrModeXRO(Root, Width / 8);
443 }
444
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446 unsigned SizeInBytes) const;
447 template <int Width>
448 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449 return selectAddrModeWRO(Root, Width / 8);
450 }
451
452 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453 bool AllowROR = false) const;
454
455 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456 return selectShiftedRegister(Root);
457 }
458
459 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460 return selectShiftedRegister(Root, true);
461 }
462
463 /// Given an extend instruction, determine the correct shift-extend type for
464 /// that instruction.
465 ///
466 /// If the instruction is going to be used in a load or store, pass
467 /// \p IsLoadStore = true.
469 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470 bool IsLoadStore = false) const;
471
472 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473 ///
474 /// \returns Either \p Reg if no change was necessary, or the new register
475 /// created by moving \p Reg.
476 ///
477 /// Note: This uses emitCopy right now.
478 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479 MachineIRBuilder &MIB) const;
480
481 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485 ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486 ComplexRendererFns
487 selectCVTFixedPosRecipOperandVec(MachineOperand &Root) const;
488 ComplexRendererFns
489 selectCVTFixedPointVecBase(const MachineOperand &Root,
490 bool isReciprocal = false) const;
491 void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFixedPointRecipXForm(MachineInstrBuilder &MIB,
494 const MachineInstr &MI, int OpIdx = -1) const;
495
496 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
497 int OpIdx = -1) const;
498 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
499 int OpIdx = -1) const;
500 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
501 int OpIdx = -1) const;
502 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
503 int OpIdx) const;
504 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
505 int OpIdx = -1) const;
506 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
507 int OpIdx = -1) const;
508 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
509 int OpIdx = -1) const;
510 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
511 const MachineInstr &MI,
512 int OpIdx = -1) const;
513
514 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
515 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
516
517 // Optimization methods.
518 bool tryOptSelect(GSelect &Sel);
519 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
520 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
522 MachineIRBuilder &MIRBuilder) const;
523
524 /// Return true if \p MI is a load or store of \p NumBytes bytes.
525 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
526
527 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
528 /// register zeroed out. In other words, the result of MI has been explicitly
529 /// zero extended.
530 bool isDef32(const MachineInstr &MI) const;
531
532 const AArch64TargetMachine &TM;
533 const AArch64Subtarget &STI;
534 const AArch64InstrInfo &TII;
536 const AArch64RegisterBankInfo &RBI;
537
538 bool ProduceNonFlagSettingCondBr = false;
539
540 // Some cached values used during selection.
541 // We use LR as a live-in register, and we keep track of it here as it can be
542 // clobbered by calls.
543 Register MFReturnAddr;
544
546
547#define GET_GLOBALISEL_PREDICATES_DECL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_PREDICATES_DECL
550
551// We declare the temporaries used by selectImpl() in the class to minimize the
552// cost of constructing placeholder values.
553#define GET_GLOBALISEL_TEMPORARIES_DECL
554#include "AArch64GenGlobalISel.inc"
555#undef GET_GLOBALISEL_TEMPORARIES_DECL
556};
557
558} // end anonymous namespace
559
560#define GET_GLOBALISEL_IMPL
561#include "AArch64GenGlobalISel.inc"
562#undef GET_GLOBALISEL_IMPL
563
564AArch64InstructionSelector::AArch64InstructionSelector(
565 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
566 const AArch64RegisterBankInfo &RBI)
567 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
568 RBI(RBI),
570#include "AArch64GenGlobalISel.inc"
573#include "AArch64GenGlobalISel.inc"
575{
576}
577
578// FIXME: This should be target-independent, inferred from the types declared
579// for each class in the bank.
580//
581/// Given a register bank, and a type, return the smallest register class that
582/// can represent that combination.
583static const TargetRegisterClass *
584getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
585 bool GetAllRegSet = false) {
586 if (RB.getID() == AArch64::GPRRegBankID) {
587 if (Ty.getSizeInBits() <= 32)
588 return GetAllRegSet ? &AArch64::GPR32allRegClass
589 : &AArch64::GPR32RegClass;
590 if (Ty.getSizeInBits() == 64)
591 return GetAllRegSet ? &AArch64::GPR64allRegClass
592 : &AArch64::GPR64RegClass;
593 if (Ty.getSizeInBits() == 128)
594 return &AArch64::XSeqPairsClassRegClass;
595 return nullptr;
596 }
597
598 if (RB.getID() == AArch64::FPRRegBankID) {
599 switch (Ty.getSizeInBits()) {
600 case 8:
601 return &AArch64::FPR8RegClass;
602 case 16:
603 return &AArch64::FPR16RegClass;
604 case 32:
605 return &AArch64::FPR32RegClass;
606 case 64:
607 return &AArch64::FPR64RegClass;
608 case 128:
609 return &AArch64::FPR128RegClass;
610 }
611 return nullptr;
612 }
613
614 return nullptr;
615}
616
617/// Given a register bank, and size in bits, return the smallest register class
618/// that can represent that combination.
619static const TargetRegisterClass *
621 bool GetAllRegSet = false) {
622 if (SizeInBits.isScalable()) {
623 assert(RB.getID() == AArch64::FPRRegBankID &&
624 "Expected FPR regbank for scalable type size");
625 return &AArch64::ZPRRegClass;
626 }
627
628 unsigned RegBankID = RB.getID();
629
630 if (RegBankID == AArch64::GPRRegBankID) {
631 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
632 if (SizeInBits <= 32)
633 return GetAllRegSet ? &AArch64::GPR32allRegClass
634 : &AArch64::GPR32RegClass;
635 if (SizeInBits == 64)
636 return GetAllRegSet ? &AArch64::GPR64allRegClass
637 : &AArch64::GPR64RegClass;
638 if (SizeInBits == 128)
639 return &AArch64::XSeqPairsClassRegClass;
640 }
641
642 if (RegBankID == AArch64::FPRRegBankID) {
643 if (SizeInBits.isScalable()) {
644 assert(SizeInBits == TypeSize::getScalable(128) &&
645 "Unexpected scalable register size");
646 return &AArch64::ZPRRegClass;
647 }
648
649 switch (SizeInBits) {
650 default:
651 return nullptr;
652 case 8:
653 return &AArch64::FPR8RegClass;
654 case 16:
655 return &AArch64::FPR16RegClass;
656 case 32:
657 return &AArch64::FPR32RegClass;
658 case 64:
659 return &AArch64::FPR64RegClass;
660 case 128:
661 return &AArch64::FPR128RegClass;
662 }
663 }
664
665 return nullptr;
666}
667
668/// Returns the correct subregister to use for a given register class.
670 const TargetRegisterInfo &TRI, unsigned &SubReg) {
671 switch (TRI.getRegSizeInBits(*RC)) {
672 case 8:
673 SubReg = AArch64::bsub;
674 break;
675 case 16:
676 SubReg = AArch64::hsub;
677 break;
678 case 32:
679 if (RC != &AArch64::FPR32RegClass)
680 SubReg = AArch64::sub_32;
681 else
682 SubReg = AArch64::ssub;
683 break;
684 case 64:
685 SubReg = AArch64::dsub;
686 break;
687 default:
689 dbgs() << "Couldn't find appropriate subregister for register class.");
690 return false;
691 }
692
693 return true;
694}
695
696/// Returns the minimum size the given register bank can hold.
697static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
698 switch (RB.getID()) {
699 case AArch64::GPRRegBankID:
700 return 32;
701 case AArch64::FPRRegBankID:
702 return 8;
703 default:
704 llvm_unreachable("Tried to get minimum size for unknown register bank.");
705 }
706}
707
708/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
709/// Helper function for functions like createDTuple and createQTuple.
710///
711/// \p RegClassIDs - The list of register class IDs available for some tuple of
712/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
713/// expected to contain between 2 and 4 tuple classes.
714///
715/// \p SubRegs - The list of subregister classes associated with each register
716/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
717/// subregister class. The index of each subregister class is expected to
718/// correspond with the index of each register class.
719///
720/// \returns Either the destination register of REG_SEQUENCE instruction that
721/// was created, or the 0th element of \p Regs if \p Regs contains a single
722/// element.
724 const unsigned RegClassIDs[],
725 const unsigned SubRegs[], MachineIRBuilder &MIB) {
726 unsigned NumRegs = Regs.size();
727 if (NumRegs == 1)
728 return Regs[0];
729 assert(NumRegs >= 2 && NumRegs <= 4 &&
730 "Only support between two and 4 registers in a tuple!");
732 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
733 auto RegSequence =
734 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
735 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
736 RegSequence.addUse(Regs[I]);
737 RegSequence.addImm(SubRegs[I]);
738 }
739 return RegSequence.getReg(0);
740}
741
742/// Create a tuple of D-registers using the registers in \p Regs.
744 static const unsigned RegClassIDs[] = {
745 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
746 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
747 AArch64::dsub2, AArch64::dsub3};
748 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
749}
750
751/// Create a tuple of Q-registers using the registers in \p Regs.
753 static const unsigned RegClassIDs[] = {
754 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
755 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
756 AArch64::qsub2, AArch64::qsub3};
757 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
758}
759
760static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
761 auto &MI = *Root.getParent();
762 auto &MBB = *MI.getParent();
763 auto &MF = *MBB.getParent();
764 auto &MRI = MF.getRegInfo();
765 uint64_t Immed;
766 if (Root.isImm())
767 Immed = Root.getImm();
768 else if (Root.isCImm())
769 Immed = Root.getCImm()->getZExtValue();
770 else if (Root.isReg()) {
771 auto ValAndVReg =
773 if (!ValAndVReg)
774 return std::nullopt;
775 Immed = ValAndVReg->Value.getSExtValue();
776 } else
777 return std::nullopt;
778 return Immed;
779}
780
781/// Check whether \p I is a currently unsupported binary operation:
782/// - it has an unsized type
783/// - an operand is not a vreg
784/// - all operands are not in the same bank
785/// These are checks that should someday live in the verifier, but right now,
786/// these are mostly limitations of the aarch64 selector.
787static bool unsupportedBinOp(const MachineInstr &I,
788 const AArch64RegisterBankInfo &RBI,
789 const MachineRegisterInfo &MRI,
790 const AArch64RegisterInfo &TRI) {
791 LLT Ty = MRI.getType(I.getOperand(0).getReg());
792 if (!Ty.isValid()) {
793 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
794 return true;
795 }
796
797 const RegisterBank *PrevOpBank = nullptr;
798 for (auto &MO : I.operands()) {
799 // FIXME: Support non-register operands.
800 if (!MO.isReg()) {
801 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
802 return true;
803 }
804
805 // FIXME: Can generic operations have physical registers operands? If
806 // so, this will need to be taught about that, and we'll need to get the
807 // bank out of the minimal class for the register.
808 // Either way, this needs to be documented (and possibly verified).
809 if (!MO.getReg().isVirtual()) {
810 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
811 return true;
812 }
813
814 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
815 if (!OpBank) {
816 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
817 return true;
818 }
819
820 if (PrevOpBank && OpBank != PrevOpBank) {
821 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
822 return true;
823 }
824 PrevOpBank = OpBank;
825 }
826 return false;
827}
828
829/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
830/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
831/// and of size \p OpSize.
832/// \returns \p GenericOpc if the combination is unsupported.
833static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
834 unsigned OpSize) {
835 switch (RegBankID) {
836 case AArch64::GPRRegBankID:
837 if (OpSize == 32) {
838 switch (GenericOpc) {
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVWr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVWr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVWr;
845 default:
846 return GenericOpc;
847 }
848 } else if (OpSize == 64) {
849 switch (GenericOpc) {
850 case TargetOpcode::G_PTR_ADD:
851 return AArch64::ADDXrr;
852 case TargetOpcode::G_SHL:
853 return AArch64::LSLVXr;
854 case TargetOpcode::G_LSHR:
855 return AArch64::LSRVXr;
856 case TargetOpcode::G_ASHR:
857 return AArch64::ASRVXr;
858 default:
859 return GenericOpc;
860 }
861 }
862 break;
863 case AArch64::FPRRegBankID:
864 switch (OpSize) {
865 case 32:
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDSrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBSrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULSrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVSrr;
875 default:
876 return GenericOpc;
877 }
878 case 64:
879 switch (GenericOpc) {
880 case TargetOpcode::G_FADD:
881 return AArch64::FADDDrr;
882 case TargetOpcode::G_FSUB:
883 return AArch64::FSUBDrr;
884 case TargetOpcode::G_FMUL:
885 return AArch64::FMULDrr;
886 case TargetOpcode::G_FDIV:
887 return AArch64::FDIVDrr;
888 case TargetOpcode::G_OR:
889 return AArch64::ORRv8i8;
890 default:
891 return GenericOpc;
892 }
893 }
894 break;
895 }
896 return GenericOpc;
897}
898
899/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
900/// appropriate for the (value) register bank \p RegBankID and of memory access
901/// size \p OpSize. This returns the variant with the base+unsigned-immediate
902/// addressing mode (e.g., LDRXui).
903/// \returns \p GenericOpc if the combination is unsupported.
904static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
905 unsigned OpSize) {
906 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
907 switch (RegBankID) {
908 case AArch64::GPRRegBankID:
909 switch (OpSize) {
910 case 8:
911 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
912 case 16:
913 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
914 case 32:
915 return isStore ? AArch64::STRWui : AArch64::LDRWui;
916 case 64:
917 return isStore ? AArch64::STRXui : AArch64::LDRXui;
918 }
919 break;
920 case AArch64::FPRRegBankID:
921 switch (OpSize) {
922 case 8:
923 return isStore ? AArch64::STRBui : AArch64::LDRBui;
924 case 16:
925 return isStore ? AArch64::STRHui : AArch64::LDRHui;
926 case 32:
927 return isStore ? AArch64::STRSui : AArch64::LDRSui;
928 case 64:
929 return isStore ? AArch64::STRDui : AArch64::LDRDui;
930 case 128:
931 return isStore ? AArch64::STRQui : AArch64::LDRQui;
932 }
933 break;
934 }
935 return GenericOpc;
936}
937
938/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
939/// to \p *To.
940///
941/// E.g "To = COPY SrcReg:SubReg"
943 const RegisterBankInfo &RBI, Register SrcReg,
944 const TargetRegisterClass *To, unsigned SubReg) {
945 assert(SrcReg.isValid() && "Expected a valid source register?");
946 assert(To && "Destination register class cannot be null");
947 assert(SubReg && "Expected a valid subregister");
948
949 MachineIRBuilder MIB(I);
950 auto SubRegCopy =
951 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, {}, SubReg);
952 MachineOperand &RegOp = I.getOperand(1);
953 RegOp.setReg(SubRegCopy.getReg(0));
954
955 // It's possible that the destination register won't be constrained. Make
956 // sure that happens.
957 if (!I.getOperand(0).getReg().isPhysical())
958 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
959
960 return true;
961}
962
963/// Helper function to get the source and destination register classes for a
964/// copy. Returns a std::pair containing the source register class for the
965/// copy, and the destination register class for the copy. If a register class
966/// cannot be determined, then it will be nullptr.
967static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
970 const RegisterBankInfo &RBI) {
971 Register DstReg = I.getOperand(0).getReg();
972 Register SrcReg = I.getOperand(1).getReg();
973 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
974 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
975
976 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
977 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
978
979 // Special casing for cross-bank copies of s1s. We can technically represent
980 // a 1-bit value with any size of register. The minimum size for a GPR is 32
981 // bits. So, we need to put the FPR on 32 bits as well.
982 //
983 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
984 // then we can pull it into the helpers that get the appropriate class for a
985 // register bank. Or make a new helper that carries along some constraint
986 // information.
987 if (SrcRegBank != DstRegBank &&
988 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
989 SrcSize = DstSize = TypeSize::getFixed(32);
990
991 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
992 getMinClassForRegBank(DstRegBank, DstSize, true)};
993}
994
995// FIXME: We need some sort of API in RBI/TRI to allow generic code to
996// constrain operands of simple instructions given a TargetRegisterClass
997// and LLT
999 const RegisterBankInfo &RBI) {
1000 for (MachineOperand &MO : I.operands()) {
1001 if (!MO.isReg())
1002 continue;
1003 Register Reg = MO.getReg();
1004 if (!Reg)
1005 continue;
1006 if (Reg.isPhysical())
1007 continue;
1008 LLT Ty = MRI.getType(Reg);
1009 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
1010 const TargetRegisterClass *RC =
1012 if (!RC) {
1013 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1014 RC = getRegClassForTypeOnBank(Ty, RB);
1015 if (!RC) {
1016 LLVM_DEBUG(
1017 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1018 break;
1019 }
1020 }
1021 RBI.constrainGenericRegister(Reg, *RC, MRI);
1022 }
1023
1024 return true;
1025}
1026
1029 const RegisterBankInfo &RBI) {
1030 Register DstReg = I.getOperand(0).getReg();
1031 Register SrcReg = I.getOperand(1).getReg();
1032 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1033 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1034
1035 // Find the correct register classes for the source and destination registers.
1036 const TargetRegisterClass *SrcRC;
1037 const TargetRegisterClass *DstRC;
1038 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1039
1040 if (!DstRC) {
1041 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1042 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1043 return false;
1044 }
1045
1046 // Is this a copy? If so, then we may need to insert a subregister copy.
1047 if (I.isCopy()) {
1048 // Yes. Check if there's anything to fix up.
1049 if (!SrcRC) {
1050 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1051 return false;
1052 }
1053
1054 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1055 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1056 unsigned SrcSubReg = I.getOperand(1).getSubReg();
1057 unsigned SubReg;
1058
1059 if (SrcSubReg)
1060 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1061
1062 // If the source bank doesn't support a subregister copy small enough,
1063 // then we first need to copy to the destination bank.
1064 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1065 const TargetRegisterClass *DstTempRC =
1066 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1067 getSubRegForClass(DstRC, TRI, SubReg);
1068
1069 MachineIRBuilder MIB(I);
1070 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1071 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1072 } else if (SrcSize > DstSize) {
1073 // If the source register is bigger than the destination we need to
1074 // perform a subregister copy.
1075 const TargetRegisterClass *SubRegRC =
1076 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1077 getSubRegForClass(SubRegRC, TRI, SubReg);
1078 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1079 } else if (DstSize > SrcSize) {
1080 // If the destination register is bigger than the source we need to do
1081 // a promotion using SUBREG_TO_REG.
1082 const TargetRegisterClass *PromotionRC =
1083 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1084 getSubRegForClass(SrcRC, TRI, SubReg);
1085
1086 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1087 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1088 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1089 .addUse(SrcReg)
1090 .addImm(SubReg);
1091 MachineOperand &RegOp = I.getOperand(1);
1092 RegOp.setReg(PromoteReg);
1093 }
1094
1095 // If the destination is a physical register, then there's nothing to
1096 // change, so we're done.
1097 if (DstReg.isPhysical())
1098 return true;
1099 }
1100
1101 // No need to constrain SrcReg. It will get constrained when we hit another
1102 // of its use or its defs. Copies do not have constraints.
1103 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1104 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1105 << " operand\n");
1106 return false;
1107 }
1108
1109 // If this a GPR ZEXT that we want to just reduce down into a copy.
1110 // The sizes will be mismatched with the source < 32b but that's ok.
1111 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1112 I.setDesc(TII.get(AArch64::COPY));
1113 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1114 return selectCopy(I, TII, MRI, TRI, RBI);
1115 }
1116
1117 I.setDesc(TII.get(AArch64::COPY));
1118 return true;
1119}
1120
1122AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1123 Register False, AArch64CC::CondCode CC,
1124 MachineIRBuilder &MIB) const {
1125 MachineRegisterInfo &MRI = *MIB.getMRI();
1126 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1127 RBI.getRegBank(True, MRI, TRI)->getID() &&
1128 "Expected both select operands to have the same regbank?");
1129 LLT Ty = MRI.getType(True);
1130 if (Ty.isVector())
1131 return nullptr;
1132 const unsigned Size = Ty.getSizeInBits();
1133 assert((Size == 32 || Size == 64) &&
1134 "Expected 32 bit or 64 bit select only?");
1135 const bool Is32Bit = Size == 32;
1136 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1137 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1138 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1140 return &*FCSel;
1141 }
1142
1143 // By default, we'll try and emit a CSEL.
1144 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1145 bool Optimized = false;
1146 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1147 &Optimized](Register &Reg, Register &OtherReg,
1148 bool Invert) {
1149 if (Optimized)
1150 return false;
1151
1152 // Attempt to fold:
1153 //
1154 // %sub = G_SUB 0, %x
1155 // %select = G_SELECT cc, %reg, %sub
1156 //
1157 // Into:
1158 // %select = CSNEG %reg, %x, cc
1159 Register MatchReg;
1160 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1161 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1162 Reg = MatchReg;
1163 if (Invert) {
1165 std::swap(Reg, OtherReg);
1166 }
1167 return true;
1168 }
1169
1170 // Attempt to fold:
1171 //
1172 // %xor = G_XOR %x, -1
1173 // %select = G_SELECT cc, %reg, %xor
1174 //
1175 // Into:
1176 // %select = CSINV %reg, %x, cc
1177 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1178 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1179 Reg = MatchReg;
1180 if (Invert) {
1182 std::swap(Reg, OtherReg);
1183 }
1184 return true;
1185 }
1186
1187 // Attempt to fold:
1188 //
1189 // %add = G_ADD %x, 1
1190 // %select = G_SELECT cc, %reg, %add
1191 //
1192 // Into:
1193 // %select = CSINC %reg, %x, cc
1194 if (mi_match(Reg, MRI,
1195 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1196 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1197 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1198 Reg = MatchReg;
1199 if (Invert) {
1201 std::swap(Reg, OtherReg);
1202 }
1203 return true;
1204 }
1205
1206 return false;
1207 };
1208
1209 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1210 // true/false values are constants.
1211 // FIXME: All of these patterns already exist in tablegen. We should be
1212 // able to import these.
1213 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1214 &Optimized]() {
1215 if (Optimized)
1216 return false;
1217 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1218 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1219 if (!TrueCst && !FalseCst)
1220 return false;
1221
1222 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1223 if (TrueCst && FalseCst) {
1224 int64_t T = TrueCst->Value.getSExtValue();
1225 int64_t F = FalseCst->Value.getSExtValue();
1226
1227 if (T == 0 && F == 1) {
1228 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1229 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1230 True = ZReg;
1231 False = ZReg;
1232 return true;
1233 }
1234
1235 if (T == 0 && F == -1) {
1236 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1237 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1238 True = ZReg;
1239 False = ZReg;
1240 return true;
1241 }
1242 }
1243
1244 if (TrueCst) {
1245 int64_t T = TrueCst->Value.getSExtValue();
1246 if (T == 1) {
1247 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1248 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1249 True = False;
1250 False = ZReg;
1252 return true;
1253 }
1254
1255 if (T == -1) {
1256 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1257 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1258 True = False;
1259 False = ZReg;
1261 return true;
1262 }
1263 }
1264
1265 if (FalseCst) {
1266 int64_t F = FalseCst->Value.getSExtValue();
1267 if (F == 1) {
1268 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1269 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1270 False = ZReg;
1271 return true;
1272 }
1273
1274 if (F == -1) {
1275 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1276 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1277 False = ZReg;
1278 return true;
1279 }
1280 }
1281 return false;
1282 };
1283
1284 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1285 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1286 Optimized |= TryOptSelectCst();
1287 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1288 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1289 return &*SelectInst;
1290}
1291
1294 MachineRegisterInfo *MRI = nullptr) {
1295 switch (P) {
1296 default:
1297 llvm_unreachable("Unknown condition code!");
1298 case CmpInst::ICMP_NE:
1299 return AArch64CC::NE;
1300 case CmpInst::ICMP_EQ:
1301 return AArch64CC::EQ;
1302 case CmpInst::ICMP_SGT:
1303 return AArch64CC::GT;
1304 case CmpInst::ICMP_SGE:
1305 if (RHS && MRI) {
1306 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1307 if (ValAndVReg && ValAndVReg->Value == 0)
1308 return AArch64CC::PL;
1309 }
1310 return AArch64CC::GE;
1311 case CmpInst::ICMP_SLT:
1312 if (RHS && MRI) {
1313 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1314 if (ValAndVReg && ValAndVReg->Value == 0)
1315 return AArch64CC::MI;
1316 }
1317 return AArch64CC::LT;
1318 case CmpInst::ICMP_SLE:
1319 return AArch64CC::LE;
1320 case CmpInst::ICMP_UGT:
1321 return AArch64CC::HI;
1322 case CmpInst::ICMP_UGE:
1323 return AArch64CC::HS;
1324 case CmpInst::ICMP_ULT:
1325 return AArch64CC::LO;
1326 case CmpInst::ICMP_ULE:
1327 return AArch64CC::LS;
1328 }
1329}
1330
1331/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1333 AArch64CC::CondCode &CondCode,
1334 AArch64CC::CondCode &CondCode2) {
1335 CondCode2 = AArch64CC::AL;
1336 switch (CC) {
1337 default:
1338 llvm_unreachable("Unknown FP condition!");
1339 case CmpInst::FCMP_OEQ:
1340 CondCode = AArch64CC::EQ;
1341 break;
1342 case CmpInst::FCMP_OGT:
1343 CondCode = AArch64CC::GT;
1344 break;
1345 case CmpInst::FCMP_OGE:
1346 CondCode = AArch64CC::GE;
1347 break;
1348 case CmpInst::FCMP_OLT:
1349 CondCode = AArch64CC::MI;
1350 break;
1351 case CmpInst::FCMP_OLE:
1352 CondCode = AArch64CC::LS;
1353 break;
1354 case CmpInst::FCMP_ONE:
1355 CondCode = AArch64CC::MI;
1356 CondCode2 = AArch64CC::GT;
1357 break;
1358 case CmpInst::FCMP_ORD:
1359 CondCode = AArch64CC::VC;
1360 break;
1361 case CmpInst::FCMP_UNO:
1362 CondCode = AArch64CC::VS;
1363 break;
1364 case CmpInst::FCMP_UEQ:
1365 CondCode = AArch64CC::EQ;
1366 CondCode2 = AArch64CC::VS;
1367 break;
1368 case CmpInst::FCMP_UGT:
1369 CondCode = AArch64CC::HI;
1370 break;
1371 case CmpInst::FCMP_UGE:
1372 CondCode = AArch64CC::PL;
1373 break;
1374 case CmpInst::FCMP_ULT:
1375 CondCode = AArch64CC::LT;
1376 break;
1377 case CmpInst::FCMP_ULE:
1378 CondCode = AArch64CC::LE;
1379 break;
1380 case CmpInst::FCMP_UNE:
1381 CondCode = AArch64CC::NE;
1382 break;
1383 }
1384}
1385
1386/// Convert an IR fp condition code to an AArch64 CC.
1387/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1388/// should be AND'ed instead of OR'ed.
1390 AArch64CC::CondCode &CondCode,
1391 AArch64CC::CondCode &CondCode2) {
1392 CondCode2 = AArch64CC::AL;
1393 switch (CC) {
1394 default:
1395 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1396 assert(CondCode2 == AArch64CC::AL);
1397 break;
1398 case CmpInst::FCMP_ONE:
1399 // (a one b)
1400 // == ((a olt b) || (a ogt b))
1401 // == ((a ord b) && (a une b))
1402 CondCode = AArch64CC::VC;
1403 CondCode2 = AArch64CC::NE;
1404 break;
1405 case CmpInst::FCMP_UEQ:
1406 // (a ueq b)
1407 // == ((a uno b) || (a oeq b))
1408 // == ((a ule b) && (a uge b))
1409 CondCode = AArch64CC::PL;
1410 CondCode2 = AArch64CC::LE;
1411 break;
1412 }
1413}
1414
1415/// Return a register which can be used as a bit to test in a TB(N)Z.
1416static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1417 MachineRegisterInfo &MRI) {
1418 assert(Reg.isValid() && "Expected valid register!");
1419 bool HasZext = false;
1420 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1421 unsigned Opc = MI->getOpcode();
1422
1423 if (!MI->getOperand(0).isReg() ||
1424 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1425 break;
1426
1427 // (tbz (any_ext x), b) -> (tbz x, b) and
1428 // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
1429 //
1430 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1431 // on the truncated x is the same as the bit number on x.
1432 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1433 Opc == TargetOpcode::G_TRUNC) {
1434 if (Opc == TargetOpcode::G_ZEXT)
1435 HasZext = true;
1436
1437 Register NextReg = MI->getOperand(1).getReg();
1438 // Did we find something worth folding?
1439 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1440 break;
1441 TypeSize InSize = MRI.getType(NextReg).getSizeInBits();
1442 if (Bit >= InSize)
1443 break;
1444
1445 // NextReg is worth folding. Keep looking.
1446 Reg = NextReg;
1447 continue;
1448 }
1449
1450 // Attempt to find a suitable operation with a constant on one side.
1451 std::optional<uint64_t> C;
1452 Register TestReg;
1453 switch (Opc) {
1454 default:
1455 break;
1456 case TargetOpcode::G_AND:
1457 case TargetOpcode::G_XOR: {
1458 TestReg = MI->getOperand(1).getReg();
1459 Register ConstantReg = MI->getOperand(2).getReg();
1460 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1461 if (!VRegAndVal) {
1462 // AND commutes, check the other side for a constant.
1463 // FIXME: Can we canonicalize the constant so that it's always on the
1464 // same side at some point earlier?
1465 std::swap(ConstantReg, TestReg);
1466 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1467 }
1468 if (VRegAndVal) {
1469 if (HasZext)
1470 C = VRegAndVal->Value.getZExtValue();
1471 else
1472 C = VRegAndVal->Value.getSExtValue();
1473 }
1474 break;
1475 }
1476 case TargetOpcode::G_ASHR:
1477 case TargetOpcode::G_LSHR:
1478 case TargetOpcode::G_SHL: {
1479 TestReg = MI->getOperand(1).getReg();
1480 auto VRegAndVal =
1481 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1482 if (VRegAndVal)
1483 C = VRegAndVal->Value.getSExtValue();
1484 break;
1485 }
1486 }
1487
1488 // Didn't find a constant or viable register. Bail out of the loop.
1489 if (!C || !TestReg.isValid())
1490 break;
1491
1492 // We found a suitable instruction with a constant. Check to see if we can
1493 // walk through the instruction.
1494 Register NextReg;
1495 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1496 switch (Opc) {
1497 default:
1498 break;
1499 case TargetOpcode::G_AND:
1500 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1501 if ((*C >> Bit) & 1)
1502 NextReg = TestReg;
1503 break;
1504 case TargetOpcode::G_SHL:
1505 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1506 // the type of the register.
1507 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1508 NextReg = TestReg;
1509 Bit = Bit - *C;
1510 }
1511 break;
1512 case TargetOpcode::G_ASHR:
1513 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1514 // in x
1515 NextReg = TestReg;
1516 Bit = Bit + *C;
1517 if (Bit >= TestRegSize)
1518 Bit = TestRegSize - 1;
1519 break;
1520 case TargetOpcode::G_LSHR:
1521 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1522 if ((Bit + *C) < TestRegSize) {
1523 NextReg = TestReg;
1524 Bit = Bit + *C;
1525 }
1526 break;
1527 case TargetOpcode::G_XOR:
1528 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1529 // appropriate.
1530 //
1531 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1532 //
1533 // tbz x', b -> tbnz x, b
1534 //
1535 // Because x' only has the b-th bit set if x does not.
1536 if ((*C >> Bit) & 1)
1537 Invert = !Invert;
1538 NextReg = TestReg;
1539 break;
1540 }
1541
1542 // Check if we found anything worth folding.
1543 if (!NextReg.isValid())
1544 return Reg;
1545 Reg = NextReg;
1546 }
1547
1548 return Reg;
1549}
1550
1551MachineInstr *AArch64InstructionSelector::emitTestBit(
1552 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1553 MachineIRBuilder &MIB) const {
1554 assert(TestReg.isValid());
1555 assert(ProduceNonFlagSettingCondBr &&
1556 "Cannot emit TB(N)Z with speculation tracking!");
1557 MachineRegisterInfo &MRI = *MIB.getMRI();
1558
1559 // Attempt to optimize the test bit by walking over instructions.
1560 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1561 LLT Ty = MRI.getType(TestReg);
1562 unsigned Size = Ty.getSizeInBits();
1563 assert(!Ty.isVector() && "Expected a scalar!");
1564 assert(Bit < 64 && "Bit is too large!");
1565
1566 // When the test register is a 64-bit register, we have to narrow to make
1567 // TBNZW work.
1568 bool UseWReg = Bit < 32;
1569 unsigned NecessarySize = UseWReg ? 32 : 64;
1570 if (Size != NecessarySize)
1571 TestReg = moveScalarRegClass(
1572 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1573 MIB);
1574
1575 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1576 {AArch64::TBZW, AArch64::TBNZW}};
1577 unsigned Opc = OpcTable[UseWReg][IsNegative];
1578 auto TestBitMI =
1579 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1580 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1581 return &*TestBitMI;
1582}
1583
1584bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1585 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1586 MachineIRBuilder &MIB) const {
1587 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1588 // Given something like this:
1589 //
1590 // %x = ...Something...
1591 // %one = G_CONSTANT i64 1
1592 // %zero = G_CONSTANT i64 0
1593 // %and = G_AND %x, %one
1594 // %cmp = G_ICMP intpred(ne), %and, %zero
1595 // %cmp_trunc = G_TRUNC %cmp
1596 // G_BRCOND %cmp_trunc, %bb.3
1597 //
1598 // We want to try and fold the AND into the G_BRCOND and produce either a
1599 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1600 //
1601 // In this case, we'd get
1602 //
1603 // TBNZ %x %bb.3
1604 //
1605
1606 // Check if the AND has a constant on its RHS which we can use as a mask.
1607 // If it's a power of 2, then it's the same as checking a specific bit.
1608 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1609 auto MaybeBit = getIConstantVRegValWithLookThrough(
1610 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1611 if (!MaybeBit)
1612 return false;
1613
1614 int32_t Bit = MaybeBit->Value.exactLogBase2();
1615 if (Bit < 0)
1616 return false;
1617
1618 Register TestReg = AndInst.getOperand(1).getReg();
1619
1620 // Emit a TB(N)Z.
1621 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1622 return true;
1623}
1624
1625MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1626 bool IsNegative,
1627 MachineBasicBlock *DestMBB,
1628 MachineIRBuilder &MIB) const {
1629 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1630 MachineRegisterInfo &MRI = *MIB.getMRI();
1631 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1632 AArch64::GPRRegBankID &&
1633 "Expected GPRs only?");
1634 auto Ty = MRI.getType(CompareReg);
1635 unsigned Width = Ty.getSizeInBits();
1636 assert(!Ty.isVector() && "Expected scalar only?");
1637 assert(Width <= 64 && "Expected width to be at most 64?");
1638 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1639 {AArch64::CBNZW, AArch64::CBNZX}};
1640 unsigned Opc = OpcTable[IsNegative][Width == 64];
1641 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1642 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1643 return &*BranchMI;
1644}
1645
1646bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1647 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1648 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1649 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1650 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1651 // totally clean. Some of them require two branches to implement.
1652 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1653 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1654 Pred);
1655 AArch64CC::CondCode CC1, CC2;
1656 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1657 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1658 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1659 if (CC2 != AArch64CC::AL)
1660 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1661 I.eraseFromParent();
1662 return true;
1663}
1664
1665bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1666 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1667 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1668 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1669 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1670 //
1671 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1672 // instructions will not be produced, as they are conditional branch
1673 // instructions that do not set flags.
1674 if (!ProduceNonFlagSettingCondBr)
1675 return false;
1676
1677 MachineRegisterInfo &MRI = *MIB.getMRI();
1678 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1679 auto Pred =
1680 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1681 Register LHS = ICmp.getOperand(2).getReg();
1682 Register RHS = ICmp.getOperand(3).getReg();
1683
1684 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1685 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1686 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1687
1688 // When we can emit a TB(N)Z, prefer that.
1689 //
1690 // Handle non-commutative condition codes first.
1691 // Note that we don't want to do this when we have a G_AND because it can
1692 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1693 if (VRegAndVal && !AndInst) {
1694 int64_t C = VRegAndVal->Value.getSExtValue();
1695
1696 // When we have a greater-than comparison, we can just test if the msb is
1697 // zero.
1698 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1699 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1700 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1701 I.eraseFromParent();
1702 return true;
1703 }
1704
1705 // When we have a less than comparison, we can just test if the msb is not
1706 // zero.
1707 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1708 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1709 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1710 I.eraseFromParent();
1711 return true;
1712 }
1713
1714 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1715 // we can test if the msb is zero.
1716 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1717 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1718 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1719 I.eraseFromParent();
1720 return true;
1721 }
1722 }
1723
1724 // Attempt to handle commutative condition codes. Right now, that's only
1725 // eq/ne.
1726 if (ICmpInst::isEquality(Pred)) {
1727 if (!VRegAndVal) {
1728 std::swap(RHS, LHS);
1729 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1730 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1731 }
1732
1733 if (VRegAndVal && VRegAndVal->Value == 0) {
1734 // If there's a G_AND feeding into this branch, try to fold it away by
1735 // emitting a TB(N)Z instead.
1736 //
1737 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1738 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1739 // would be redundant.
1740 if (AndInst &&
1741 tryOptAndIntoCompareBranch(
1742 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1743 I.eraseFromParent();
1744 return true;
1745 }
1746
1747 // Otherwise, try to emit a CB(N)Z instead.
1748 auto LHSTy = MRI.getType(LHS);
1749 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1750 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1751 I.eraseFromParent();
1752 return true;
1753 }
1754 }
1755 }
1756
1757 return false;
1758}
1759
1760bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1761 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1762 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1763 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1764 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1765 return true;
1766
1767 // Couldn't optimize. Emit a compare + a Bcc.
1768 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1769 auto &PredOp = ICmp.getOperand(1);
1770 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1772 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1773 ICmp.getOperand(3).getReg(), MIB.getMRI());
1774 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1775 I.eraseFromParent();
1776 return true;
1777}
1778
1779bool AArch64InstructionSelector::selectCompareBranch(
1780 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1781 Register CondReg = I.getOperand(0).getReg();
1782 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1783 // Try to select the G_BRCOND using whatever is feeding the condition if
1784 // possible.
1785 unsigned CCMIOpc = CCMI->getOpcode();
1786 if (CCMIOpc == TargetOpcode::G_FCMP)
1787 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1788 if (CCMIOpc == TargetOpcode::G_ICMP)
1789 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1790
1791 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1792 // instructions will not be produced, as they are conditional branch
1793 // instructions that do not set flags.
1794 if (ProduceNonFlagSettingCondBr) {
1795 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1796 I.getOperand(1).getMBB(), MIB);
1797 I.eraseFromParent();
1798 return true;
1799 }
1800
1801 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1802 auto TstMI =
1803 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1805 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1807 .addMBB(I.getOperand(1).getMBB());
1808 I.eraseFromParent();
1810 return true;
1811}
1812
1813/// Returns the element immediate value of a vector shift operand if found.
1814/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1815static std::optional<int64_t> getVectorShiftImm(Register Reg,
1816 MachineRegisterInfo &MRI) {
1817 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1818 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1819 return getAArch64VectorSplatScalar(*OpMI, MRI);
1820}
1821
1822/// Matches and returns the shift immediate value for a SHL instruction given
1823/// a shift operand.
1824static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1825 MachineRegisterInfo &MRI) {
1826 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1827 if (!ShiftImm)
1828 return std::nullopt;
1829 // Check the immediate is in range for a SHL.
1830 int64_t Imm = *ShiftImm;
1831 if (Imm < 0)
1832 return std::nullopt;
1833 switch (SrcTy.getElementType().getSizeInBits()) {
1834 default:
1835 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1836 return std::nullopt;
1837 case 8:
1838 if (Imm > 7)
1839 return std::nullopt;
1840 break;
1841 case 16:
1842 if (Imm > 15)
1843 return std::nullopt;
1844 break;
1845 case 32:
1846 if (Imm > 31)
1847 return std::nullopt;
1848 break;
1849 case 64:
1850 if (Imm > 63)
1851 return std::nullopt;
1852 break;
1853 }
1854 return Imm;
1855}
1856
1857bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1858 MachineRegisterInfo &MRI) {
1859 assert(I.getOpcode() == TargetOpcode::G_SHL);
1860 Register DstReg = I.getOperand(0).getReg();
1861 const LLT Ty = MRI.getType(DstReg);
1862 Register Src1Reg = I.getOperand(1).getReg();
1863 Register Src2Reg = I.getOperand(2).getReg();
1864
1865 if (!Ty.isVector())
1866 return false;
1867
1868 // Check if we have a vector of constants on RHS that we can select as the
1869 // immediate form.
1870 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1871
1872 unsigned Opc = 0;
1873 if (Ty == LLT::fixed_vector(2, 64)) {
1874 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1875 } else if (Ty == LLT::fixed_vector(4, 32)) {
1876 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1877 } else if (Ty == LLT::fixed_vector(2, 32)) {
1878 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1879 } else if (Ty == LLT::fixed_vector(4, 16)) {
1880 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1881 } else if (Ty == LLT::fixed_vector(8, 16)) {
1882 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1883 } else if (Ty == LLT::fixed_vector(16, 8)) {
1884 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1885 } else if (Ty == LLT::fixed_vector(8, 8)) {
1886 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1887 } else {
1888 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1889 return false;
1890 }
1891
1892 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1893 if (ImmVal)
1894 Shl.addImm(*ImmVal);
1895 else
1896 Shl.addUse(Src2Reg);
1898 I.eraseFromParent();
1899 return true;
1900}
1901
1902bool AArch64InstructionSelector::selectVectorAshrLshr(
1903 MachineInstr &I, MachineRegisterInfo &MRI) {
1904 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1905 I.getOpcode() == TargetOpcode::G_LSHR);
1906 Register DstReg = I.getOperand(0).getReg();
1907 const LLT Ty = MRI.getType(DstReg);
1908 Register Src1Reg = I.getOperand(1).getReg();
1909 Register Src2Reg = I.getOperand(2).getReg();
1910
1911 if (!Ty.isVector())
1912 return false;
1913
1914 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1915
1916 // We expect the immediate case to be lowered in the PostLegalCombiner to
1917 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1918
1919 // There is not a shift right register instruction, but the shift left
1920 // register instruction takes a signed value, where negative numbers specify a
1921 // right shift.
1922
1923 unsigned Opc = 0;
1924 unsigned NegOpc = 0;
1925 const TargetRegisterClass *RC =
1926 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1927 if (Ty == LLT::fixed_vector(2, 64)) {
1928 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1929 NegOpc = AArch64::NEGv2i64;
1930 } else if (Ty == LLT::fixed_vector(4, 32)) {
1931 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1932 NegOpc = AArch64::NEGv4i32;
1933 } else if (Ty == LLT::fixed_vector(2, 32)) {
1934 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1935 NegOpc = AArch64::NEGv2i32;
1936 } else if (Ty == LLT::fixed_vector(4, 16)) {
1937 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1938 NegOpc = AArch64::NEGv4i16;
1939 } else if (Ty == LLT::fixed_vector(8, 16)) {
1940 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1941 NegOpc = AArch64::NEGv8i16;
1942 } else if (Ty == LLT::fixed_vector(16, 8)) {
1943 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1944 NegOpc = AArch64::NEGv16i8;
1945 } else if (Ty == LLT::fixed_vector(8, 8)) {
1946 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1947 NegOpc = AArch64::NEGv8i8;
1948 } else {
1949 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1950 return false;
1951 }
1952
1953 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1955 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1957 I.eraseFromParent();
1958 return true;
1959}
1960
1961bool AArch64InstructionSelector::selectVaStartAAPCS(
1962 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1963
1965 MF.getFunction().isVarArg()))
1966 return false;
1967
1968 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1969 // Standard, section 10.1.5.
1970
1971 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1972 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1973 const auto *PtrRegClass =
1974 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1975
1976 const MCInstrDesc &MCIDAddAddr =
1977 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1978 const MCInstrDesc &MCIDStoreAddr =
1979 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1980
1981 /*
1982 * typedef struct va_list {
1983 * void * stack; // next stack param
1984 * void * gr_top; // end of GP arg reg save area
1985 * void * vr_top; // end of FP/SIMD arg reg save area
1986 * int gr_offs; // offset from gr_top to next GP register arg
1987 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1988 * } va_list;
1989 */
1990 const auto VAList = I.getOperand(0).getReg();
1991
1992 // Our current offset in bytes from the va_list struct (VAList).
1993 unsigned OffsetBytes = 0;
1994
1995 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1996 // and increment OffsetBytes by PtrSize.
1997 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1998 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1999 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
2000 .addDef(Top)
2001 .addFrameIndex(FrameIndex)
2002 .addImm(Imm)
2003 .addImm(0);
2005
2006 const auto *MMO = *I.memoperands_begin();
2007 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
2008 .addUse(Top)
2009 .addUse(VAList)
2010 .addImm(OffsetBytes / PtrSize)
2012 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2013 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2015
2016 OffsetBytes += PtrSize;
2017 };
2018
2019 // void* stack at offset 0
2020 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2021
2022 // void* gr_top at offset 8 (4 on ILP32)
2023 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2024 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2025
2026 // void* vr_top at offset 16 (8 on ILP32)
2027 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2028 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2029
2030 // Helper function to store a 4-byte integer constant to VAList at offset
2031 // OffsetBytes, and increment OffsetBytes by 4.
2032 const auto PushIntConstant = [&](const int32_t Value) {
2033 constexpr int IntSize = 4;
2034 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2035 auto MIB =
2036 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2037 .addDef(Temp)
2038 .addImm(Value);
2040
2041 const auto *MMO = *I.memoperands_begin();
2042 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2043 .addUse(Temp)
2044 .addUse(VAList)
2045 .addImm(OffsetBytes / IntSize)
2047 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2048 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2050 OffsetBytes += IntSize;
2051 };
2052
2053 // int gr_offs at offset 24 (12 on ILP32)
2054 PushIntConstant(-static_cast<int32_t>(GPRSize));
2055
2056 // int vr_offs at offset 28 (16 on ILP32)
2057 PushIntConstant(-static_cast<int32_t>(FPRSize));
2058
2059 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2060
2061 I.eraseFromParent();
2062 return true;
2063}
2064
2065bool AArch64InstructionSelector::selectVaStartDarwin(
2066 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2067 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2068 Register ListReg = I.getOperand(0).getReg();
2069
2070 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2071
2072 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2073 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2075 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2076 ? FuncInfo->getVarArgsGPRIndex()
2077 : FuncInfo->getVarArgsStackIndex();
2078 }
2079
2080 auto MIB =
2081 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2082 .addDef(ArgsAddrReg)
2083 .addFrameIndex(FrameIdx)
2084 .addImm(0)
2085 .addImm(0);
2086
2088
2089 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2090 .addUse(ArgsAddrReg)
2091 .addUse(ListReg)
2092 .addImm(0)
2093 .addMemOperand(*I.memoperands_begin());
2094
2096 I.eraseFromParent();
2097 return true;
2098}
2099
2100void AArch64InstructionSelector::materializeLargeCMVal(
2101 MachineInstr &I, const Value *V, unsigned OpFlags) {
2102 MachineBasicBlock &MBB = *I.getParent();
2103 MachineFunction &MF = *MBB.getParent();
2104 MachineRegisterInfo &MRI = MF.getRegInfo();
2105
2106 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2107 MovZ->addOperand(MF, I.getOperand(1));
2108 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2110 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2112
2113 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2114 Register ForceDstReg) {
2115 Register DstReg = ForceDstReg
2116 ? ForceDstReg
2117 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2118 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2119 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2120 MovI->addOperand(MF, MachineOperand::CreateGA(
2121 GV, MovZ->getOperand(1).getOffset(), Flags));
2122 } else {
2123 MovI->addOperand(
2125 MovZ->getOperand(1).getOffset(), Flags));
2126 }
2129 return DstReg;
2130 };
2131 Register DstReg = BuildMovK(MovZ.getReg(0),
2133 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2134 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2135}
2136
2137bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2138 MachineBasicBlock &MBB = *I.getParent();
2139 MachineFunction &MF = *MBB.getParent();
2140 MachineRegisterInfo &MRI = MF.getRegInfo();
2141
2142 switch (I.getOpcode()) {
2143 case TargetOpcode::G_CONSTANT: {
2144 Register DefReg = I.getOperand(0).getReg();
2145 const LLT DefTy = MRI.getType(DefReg);
2146 if (!DefTy.isPointer())
2147 return false;
2148 const unsigned PtrSize = DefTy.getSizeInBits();
2149 if (PtrSize != 32 && PtrSize != 64)
2150 return false;
2151 // Convert pointer typed constants to integers so TableGen can select.
2152 MRI.setType(DefReg, LLT::integer(PtrSize));
2153 return true;
2154 }
2155 case TargetOpcode::G_STORE: {
2156 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2157 MachineOperand &SrcOp = I.getOperand(0);
2158 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2159 // Allow matching with imported patterns for stores of pointers. Unlike
2160 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2161 // and constrain.
2162 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2163 Register NewSrc = Copy.getReg(0);
2164 SrcOp.setReg(NewSrc);
2165 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2166 Changed = true;
2167 }
2168 return Changed;
2169 }
2170 case TargetOpcode::G_PTR_ADD: {
2171 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2172 // arithmetic semantics instead of falling back to regular arithmetic.
2173 const auto &TL = STI.getTargetLowering();
2174 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2175 return false;
2176 return convertPtrAddToAdd(I, MRI);
2177 }
2178 case TargetOpcode::G_LOAD: {
2179 // For scalar loads of pointers, we try to convert the dest type from p0
2180 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2181 // conversion, this should be ok because all users should have been
2182 // selected already, so the type doesn't matter for them.
2183 Register DstReg = I.getOperand(0).getReg();
2184 const LLT DstTy = MRI.getType(DstReg);
2185 if (!DstTy.isPointer())
2186 return false;
2187 MRI.setType(DstReg, LLT::scalar(64));
2188 return true;
2189 }
2190 case AArch64::G_DUP: {
2191 // Convert the type from p0 to s64 to help selection.
2192 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2193 if (!DstTy.isPointerVector())
2194 return false;
2195 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2196 MRI.setType(I.getOperand(0).getReg(),
2197 DstTy.changeElementType(LLT::scalar(64)));
2198 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2199 I.getOperand(1).setReg(NewSrc.getReg(0));
2200 return true;
2201 }
2202 case AArch64::G_INSERT_VECTOR_ELT: {
2203 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2204 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2205 if (SrcVecTy.isPointerVector()) {
2206 // Convert the type from p0 to s64 to help selection.
2207 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2208 MRI.setType(I.getOperand(1).getReg(),
2209 DstTy.changeElementType(LLT::scalar(64)));
2210 MRI.setType(I.getOperand(0).getReg(),
2211 DstTy.changeElementType(LLT::scalar(64)));
2212 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2213 I.getOperand(2).setReg(NewSrc.getReg(0));
2214 return true;
2215 }
2216
2217 Register EltReg = I.getOperand(2).getReg();
2218 LLT EltTy = MRI.getType(EltReg);
2219 if (EltTy.isScalar() &&
2220 (EltTy.getSizeInBits() == 8 || EltTy.getSizeInBits() == 16) &&
2221 RBI.getRegBank(EltReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2222 // Convert the type from s8/s16 to s32 to help selection.
2223 auto NewElt = MIB.buildCopy(LLT::scalar(32), EltReg);
2224 MRI.setRegClass(NewElt.getReg(0), &AArch64::GPR32RegClass);
2225 I.getOperand(2).setReg(NewElt.getReg(0));
2226 return true;
2227 }
2228 return false;
2229 }
2230 case TargetOpcode::G_UITOFP:
2231 case TargetOpcode::G_SITOFP: {
2232 // If both source and destination regbanks are FPR, then convert the opcode
2233 // to G_SITOF so that the importer can select it to an fpr variant.
2234 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2235 // copy.
2236 Register SrcReg = I.getOperand(1).getReg();
2237 LLT SrcTy = MRI.getType(SrcReg);
2238 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2239 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2240 return false;
2241
2242 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2243 // Need to add a copy to change the type so that the existing patterns can
2244 // match when there is an integer on an FPR bank.
2245 if (SrcTy.getScalarType().isInteger()) {
2246 auto Copy = MIB.buildCopy(DstTy, SrcReg);
2247 I.getOperand(1).setReg(Copy.getReg(0));
2248 MRI.setRegClass(Copy.getReg(0),
2249 getRegClassForTypeOnBank(
2250 SrcTy, RBI.getRegBank(AArch64::FPRRegBankID)));
2251 }
2252 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2253 I.setDesc(TII.get(AArch64::G_SITOF));
2254 else
2255 I.setDesc(TII.get(AArch64::G_UITOF));
2256 return true;
2257 }
2258 return false;
2259 }
2260 default:
2261 return false;
2262 }
2263}
2264
2265/// This lowering tries to look for G_PTR_ADD instructions and then converts
2266/// them to a standard G_ADD with a COPY on the source.
2267///
2268/// The motivation behind this is to expose the add semantics to the imported
2269/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2270/// because the selector works bottom up, uses before defs. By the time we
2271/// end up trying to select a G_PTR_ADD, we should have already attempted to
2272/// fold this into addressing modes and were therefore unsuccessful.
2273bool AArch64InstructionSelector::convertPtrAddToAdd(
2274 MachineInstr &I, MachineRegisterInfo &MRI) {
2275 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2276 Register DstReg = I.getOperand(0).getReg();
2277 Register AddOp1Reg = I.getOperand(1).getReg();
2278 const LLT PtrTy = MRI.getType(DstReg);
2279 if (PtrTy.getAddressSpace() != 0)
2280 return false;
2281
2282 const LLT CastPtrTy = PtrTy.isVector()
2284 : LLT::integer(64);
2285 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2286 // Set regbanks on the registers.
2287 if (PtrTy.isVector())
2288 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2289 else
2290 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2291
2292 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2293 // %dst(intty) = G_ADD %intbase, off
2294 I.setDesc(TII.get(TargetOpcode::G_ADD));
2295 MRI.setType(DstReg, CastPtrTy);
2296 I.getOperand(1).setReg(PtrToInt.getReg(0));
2297 if (!select(*PtrToInt)) {
2298 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2299 return false;
2300 }
2301
2302 // Also take the opportunity here to try to do some optimization.
2303 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2304 Register NegatedReg;
2305 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2306 return true;
2307 I.getOperand(2).setReg(NegatedReg);
2308 I.setDesc(TII.get(TargetOpcode::G_SUB));
2309 return true;
2310}
2311
2312bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2313 MachineRegisterInfo &MRI) {
2314 // We try to match the immediate variant of LSL, which is actually an alias
2315 // for a special case of UBFM. Otherwise, we fall back to the imported
2316 // selector which will match the register variant.
2317 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2318 const auto &MO = I.getOperand(2);
2319 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2320 if (!VRegAndVal)
2321 return false;
2322
2323 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2324 if (DstTy.isVector())
2325 return false;
2326 bool Is64Bit = DstTy.getSizeInBits() == 64;
2327 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2328 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2329
2330 if (!Imm1Fn || !Imm2Fn)
2331 return false;
2332
2333 auto NewI =
2334 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2335 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2336
2337 for (auto &RenderFn : *Imm1Fn)
2338 RenderFn(NewI);
2339 for (auto &RenderFn : *Imm2Fn)
2340 RenderFn(NewI);
2341
2342 I.eraseFromParent();
2344 return true;
2345}
2346
2347bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2348 MachineInstr &I, MachineRegisterInfo &MRI) {
2349 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2350 // If we're storing a scalar, it doesn't matter what register bank that
2351 // scalar is on. All that matters is the size.
2352 //
2353 // So, if we see something like this (with a 32-bit scalar as an example):
2354 //
2355 // %x:gpr(s32) = ... something ...
2356 // %y:fpr(s32) = COPY %x:gpr(s32)
2357 // G_STORE %y:fpr(s32)
2358 //
2359 // We can fix this up into something like this:
2360 //
2361 // G_STORE %x:gpr(s32)
2362 //
2363 // And then continue the selection process normally.
2364 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2365 if (!DefDstReg.isValid())
2366 return false;
2367 LLT DefDstTy = MRI.getType(DefDstReg);
2368 Register StoreSrcReg = I.getOperand(0).getReg();
2369 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2370
2371 // If we get something strange like a physical register, then we shouldn't
2372 // go any further.
2373 if (!DefDstTy.isValid())
2374 return false;
2375
2376 // Are the source and dst types the same size?
2377 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2378 return false;
2379
2380 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2381 RBI.getRegBank(DefDstReg, MRI, TRI))
2382 return false;
2383
2384 // We have a cross-bank copy, which is entering a store. Let's fold it.
2385 I.getOperand(0).setReg(DefDstReg);
2386 return true;
2387}
2388
2389bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2390 assert(I.getParent() && "Instruction should be in a basic block!");
2391 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2392
2393 MachineBasicBlock &MBB = *I.getParent();
2394 MachineFunction &MF = *MBB.getParent();
2395 MachineRegisterInfo &MRI = MF.getRegInfo();
2396
2397 switch (I.getOpcode()) {
2398 case AArch64::G_DUP: {
2399 // Before selecting a DUP instruction, check if it is better selected as a
2400 // MOV or load from a constant pool.
2401 Register Src = I.getOperand(1).getReg();
2402 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
2403 Src, MRI, /*LookThroughInstrs=*/true, /*LookThroughAnyExt=*/true);
2404 if (!ValAndVReg)
2405 return false;
2406 LLVMContext &Ctx = MF.getFunction().getContext();
2407 Register Dst = I.getOperand(0).getReg();
2409 MRI.getType(Dst).getNumElements(),
2410 ConstantInt::get(
2411 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2412 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2413 if (!emitConstantVector(Dst, CV, MIB, MRI))
2414 return false;
2415 I.eraseFromParent();
2416 return true;
2417 }
2418 case TargetOpcode::G_SEXT:
2419 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2420 // over a normal extend.
2421 if (selectUSMovFromExtend(I, MRI))
2422 return true;
2423 return false;
2424 case TargetOpcode::G_BR:
2425 return false;
2426 case TargetOpcode::G_SHL:
2427 return earlySelectSHL(I, MRI);
2428 case TargetOpcode::G_CONSTANT: {
2429 bool IsZero = false;
2430 if (I.getOperand(1).isCImm())
2431 IsZero = I.getOperand(1).getCImm()->isZero();
2432 else if (I.getOperand(1).isImm())
2433 IsZero = I.getOperand(1).getImm() == 0;
2434
2435 if (!IsZero)
2436 return false;
2437
2438 Register DefReg = I.getOperand(0).getReg();
2439 LLT Ty = MRI.getType(DefReg);
2440 if (Ty.getSizeInBits() == 64) {
2441 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2442 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2443 } else if (Ty.getSizeInBits() <= 32) {
2444 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2445 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2446 } else
2447 return false;
2448
2449 I.setDesc(TII.get(TargetOpcode::COPY));
2450 return true;
2451 }
2452
2453 case TargetOpcode::G_ADD: {
2454 // Check if this is being fed by a G_ICMP on either side.
2455 //
2456 // (cmp pred, x, y) + z
2457 //
2458 // In the above case, when the cmp is true, we increment z by 1. So, we can
2459 // fold the add into the cset for the cmp by using cinc.
2460 //
2461 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2462 Register AddDst = I.getOperand(0).getReg();
2463 Register AddLHS = I.getOperand(1).getReg();
2464 Register AddRHS = I.getOperand(2).getReg();
2465 // Only handle scalars.
2466 LLT Ty = MRI.getType(AddLHS);
2467 if (Ty.isVector())
2468 return false;
2469 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2470 // bits.
2471 unsigned Size = Ty.getSizeInBits();
2472 if (Size != 32 && Size != 64)
2473 return false;
2474 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2475 if (!MRI.hasOneNonDBGUse(Reg))
2476 return nullptr;
2477 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2478 // compare.
2479 if (Size == 32)
2480 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2481 // We model scalar compares using 32-bit destinations right now.
2482 // If it's a 64-bit compare, it'll have 64-bit sources.
2483 Register ZExt;
2484 if (!mi_match(Reg, MRI,
2486 return nullptr;
2487 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2488 if (!Cmp ||
2489 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2490 return nullptr;
2491 return Cmp;
2492 };
2493 // Try to match
2494 // z + (cmp pred, x, y)
2495 MachineInstr *Cmp = MatchCmp(AddRHS);
2496 if (!Cmp) {
2497 // (cmp pred, x, y) + z
2498 std::swap(AddLHS, AddRHS);
2499 Cmp = MatchCmp(AddRHS);
2500 if (!Cmp)
2501 return false;
2502 }
2503 auto &PredOp = Cmp->getOperand(1);
2505 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2506 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2507 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2509 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2510 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2511 I.eraseFromParent();
2512 return true;
2513 }
2514 case TargetOpcode::G_OR: {
2515 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2516 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2517 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2518 Register Dst = I.getOperand(0).getReg();
2519 LLT Ty = MRI.getType(Dst);
2520
2521 if (!Ty.isScalar())
2522 return false;
2523
2524 unsigned Size = Ty.getSizeInBits();
2525 if (Size != 32 && Size != 64)
2526 return false;
2527
2528 Register ShiftSrc;
2529 int64_t ShiftImm;
2530 Register MaskSrc;
2531 int64_t MaskImm;
2532 if (!mi_match(
2533 Dst, MRI,
2534 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2535 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2536 return false;
2537
2538 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2539 return false;
2540
2541 int64_t Immr = Size - ShiftImm;
2542 int64_t Imms = Size - ShiftImm - 1;
2543 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2544 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2545 I.eraseFromParent();
2546 return true;
2547 }
2548 case TargetOpcode::G_FENCE: {
2549 if (I.getOperand(1).getImm() == 0)
2550 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2551 else
2552 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2553 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2554 I.eraseFromParent();
2555 return true;
2556 }
2557 default:
2558 return false;
2559 }
2560}
2561
2562bool AArch64InstructionSelector::select(MachineInstr &I) {
2563 assert(I.getParent() && "Instruction should be in a basic block!");
2564 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2565
2566 MachineBasicBlock &MBB = *I.getParent();
2567 MachineFunction &MF = *MBB.getParent();
2568 MachineRegisterInfo &MRI = MF.getRegInfo();
2569
2570 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2571 if (Subtarget->requiresStrictAlign()) {
2572 // We don't support this feature yet.
2573 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2574 return false;
2575 }
2576
2578
2579 unsigned Opcode = I.getOpcode();
2580 // G_PHI requires same handling as PHI
2581 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2582 // Certain non-generic instructions also need some special handling.
2583
2584 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) {
2586 return true;
2587 }
2588
2589 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2590 const Register DefReg = I.getOperand(0).getReg();
2591 const LLT DefTy = MRI.getType(DefReg);
2592
2593 const RegClassOrRegBank &RegClassOrBank =
2594 MRI.getRegClassOrRegBank(DefReg);
2595
2596 const TargetRegisterClass *DefRC =
2598 if (!DefRC) {
2599 if (!DefTy.isValid()) {
2600 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2601 return false;
2602 }
2603 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2604 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2605 if (!DefRC) {
2606 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2607 return false;
2608 }
2609 }
2610
2611 I.setDesc(TII.get(TargetOpcode::PHI));
2612
2613 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2614 }
2615
2616 if (I.isCopy())
2617 return selectCopy(I, TII, MRI, TRI, RBI);
2618
2619 if (I.isDebugInstr())
2620 return selectDebugInstr(I, MRI, RBI);
2621
2622 return true;
2623 }
2624
2625
2626 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2627 LLVM_DEBUG(
2628 dbgs() << "Generic instruction has unexpected implicit operands\n");
2629 return false;
2630 }
2631
2632 // Try to do some lowering before we start instruction selecting. These
2633 // lowerings are purely transformations on the input G_MIR and so selection
2634 // must continue after any modification of the instruction.
2635 if (preISelLower(I)) {
2636 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2637 }
2638
2639 // There may be patterns where the importer can't deal with them optimally,
2640 // but does select it to a suboptimal sequence so our custom C++ selection
2641 // code later never has a chance to work on it. Therefore, we have an early
2642 // selection attempt here to give priority to certain selection routines
2643 // over the imported ones.
2644 if (earlySelect(I))
2645 return true;
2646
2647 if (selectImpl(I, *CoverageInfo))
2648 return true;
2649
2650 LLT Ty =
2651 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2652
2653 switch (Opcode) {
2654 case TargetOpcode::G_SBFX:
2655 case TargetOpcode::G_UBFX: {
2656 static const unsigned OpcTable[2][2] = {
2657 {AArch64::UBFMWri, AArch64::UBFMXri},
2658 {AArch64::SBFMWri, AArch64::SBFMXri}};
2659 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2660 unsigned Size = Ty.getSizeInBits();
2661 unsigned Opc = OpcTable[IsSigned][Size == 64];
2662 auto Cst1 =
2663 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2664 assert(Cst1 && "Should have gotten a constant for src 1?");
2665 auto Cst2 =
2666 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2667 assert(Cst2 && "Should have gotten a constant for src 2?");
2668 auto LSB = Cst1->Value.getZExtValue();
2669 auto Width = Cst2->Value.getZExtValue();
2670 auto BitfieldInst =
2671 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2672 .addImm(LSB)
2673 .addImm(LSB + Width - 1);
2674 I.eraseFromParent();
2675 constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2676 return true;
2677 }
2678 case TargetOpcode::G_BRCOND:
2679 return selectCompareBranch(I, MF, MRI);
2680
2681 case TargetOpcode::G_BRINDIRECT: {
2682 const Function &Fn = MF.getFunction();
2683 if (std::optional<uint16_t> BADisc =
2685 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2686 MI.addImm(AArch64PACKey::IA);
2687 MI.addImm(*BADisc);
2688 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2689 I.eraseFromParent();
2691 return true;
2692 }
2693 I.setDesc(TII.get(AArch64::BR));
2695 return true;
2696 }
2697
2698 case TargetOpcode::G_BRJT:
2699 return selectBrJT(I, MRI);
2700
2701 case AArch64::G_ADD_LOW: {
2702 // This op may have been separated from it's ADRP companion by the localizer
2703 // or some other code motion pass. Given that many CPUs will try to
2704 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2705 // which will later be expanded into an ADRP+ADD pair after scheduling.
2706 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2707 if (BaseMI->getOpcode() != AArch64::ADRP) {
2708 I.setDesc(TII.get(AArch64::ADDXri));
2709 I.addOperand(MachineOperand::CreateImm(0));
2711 return true;
2712 }
2714 "Expected small code model");
2715 auto Op1 = BaseMI->getOperand(1);
2716 auto Op2 = I.getOperand(2);
2717 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2718 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2719 Op1.getTargetFlags())
2720 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2721 Op2.getTargetFlags());
2722 I.eraseFromParent();
2723 constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2724 return true;
2725 }
2726
2727 case TargetOpcode::G_FCONSTANT: {
2728 const Register DefReg = I.getOperand(0).getReg();
2729 const LLT DefTy = MRI.getType(DefReg);
2730 const unsigned DefSize = DefTy.getSizeInBits();
2731 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2732
2733 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2734 // For 16, 64, and 128b values, emit a constant pool load.
2735 switch (DefSize) {
2736 default:
2737 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2738 case 32:
2739 case 64: {
2740 bool OptForSize = shouldOptForSize(&MF);
2741 const auto &TLI = MF.getSubtarget().getTargetLowering();
2742 // If TLI says that this fpimm is illegal, then we'll expand to a
2743 // constant pool load.
2744 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2745 EVT::getFloatingPointVT(DefSize), OptForSize))
2746 break;
2747 [[fallthrough]];
2748 }
2749 case 16:
2750 case 128: {
2751 auto *FPImm = I.getOperand(1).getFPImm();
2752 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2753 if (!LoadMI) {
2754 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2755 return false;
2756 }
2757 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2758 I.eraseFromParent();
2759 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2760 }
2761 }
2762
2763 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2764 // Either emit a FMOV, or emit a copy to emit a normal mov.
2765 const Register DefGPRReg = MRI.createVirtualRegister(
2766 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2767 MachineOperand &RegOp = I.getOperand(0);
2768 RegOp.setReg(DefGPRReg);
2769 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2770 MIB.buildCopy({DefReg}, {DefGPRReg});
2771
2772 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2773 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2774 return false;
2775 }
2776
2777 MachineOperand &ImmOp = I.getOperand(1);
2778 ImmOp.ChangeToImmediate(
2780
2781 const unsigned MovOpc =
2782 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2783 I.setDesc(TII.get(MovOpc));
2785 return true;
2786 }
2787 case TargetOpcode::G_EXTRACT: {
2788 Register DstReg = I.getOperand(0).getReg();
2789 Register SrcReg = I.getOperand(1).getReg();
2790 LLT SrcTy = MRI.getType(SrcReg);
2791 LLT DstTy = MRI.getType(DstReg);
2792 (void)DstTy;
2793 unsigned SrcSize = SrcTy.getSizeInBits();
2794
2795 if (SrcTy.getSizeInBits() > 64) {
2796 // This should be an extract of an s128, which is like a vector extract.
2797 if (SrcTy.getSizeInBits() != 128)
2798 return false;
2799 // Only support extracting 64 bits from an s128 at the moment.
2800 if (DstTy.getSizeInBits() != 64)
2801 return false;
2802
2803 unsigned Offset = I.getOperand(2).getImm();
2804 if (Offset % 64 != 0)
2805 return false;
2806
2807 // Check we have the right regbank always.
2808 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2809 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2810 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2811
2812 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2813 auto NewI =
2814 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2815 .addUse(SrcReg, {},
2816 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2817 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2818 AArch64::GPR64RegClass, NewI->getOperand(0));
2819 I.eraseFromParent();
2820 return true;
2821 }
2822
2823 // Emit the same code as a vector extract.
2824 // Offset must be a multiple of 64.
2825 unsigned LaneIdx = Offset / 64;
2826 MachineInstr *Extract = emitExtractVectorElt(
2827 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2828 if (!Extract)
2829 return false;
2830 I.eraseFromParent();
2831 return true;
2832 }
2833
2834 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2835 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2836 Ty.getSizeInBits() - 1);
2837
2838 if (SrcSize < 64) {
2839 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2840 "unexpected G_EXTRACT types");
2842 return true;
2843 }
2844
2845 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2846 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2847 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2848 .addReg(DstReg, {}, AArch64::sub_32);
2849 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2850 AArch64::GPR32RegClass, MRI);
2851 I.getOperand(0).setReg(DstReg);
2852
2854 return true;
2855 }
2856
2857 case TargetOpcode::G_INSERT: {
2858 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2859 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2860 unsigned DstSize = DstTy.getSizeInBits();
2861 // Larger inserts are vectors, same-size ones should be something else by
2862 // now (split up or turned into COPYs).
2863 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2864 return false;
2865
2866 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2867 unsigned LSB = I.getOperand(3).getImm();
2868 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2869 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2870 MachineInstrBuilder(MF, I).addImm(Width - 1);
2871
2872 if (DstSize < 64) {
2873 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2874 "unexpected G_INSERT types");
2876 return true;
2877 }
2878
2880 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2881 TII.get(AArch64::SUBREG_TO_REG))
2882 .addDef(SrcReg)
2883 .addUse(I.getOperand(2).getReg())
2884 .addImm(AArch64::sub_32);
2885 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2886 AArch64::GPR32RegClass, MRI);
2887 I.getOperand(2).setReg(SrcReg);
2888
2890 return true;
2891 }
2892 case TargetOpcode::G_FRAME_INDEX: {
2893 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2894 if (Ty != LLT::pointer(0, 64)) {
2895 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2896 << ", expected: " << LLT::pointer(0, 64) << '\n');
2897 return false;
2898 }
2899 I.setDesc(TII.get(AArch64::ADDXri));
2900
2901 // MOs for a #0 shifted immediate.
2902 I.addOperand(MachineOperand::CreateImm(0));
2903 I.addOperand(MachineOperand::CreateImm(0));
2904
2906 return true;
2907 }
2908
2909 case TargetOpcode::G_GLOBAL_VALUE: {
2910 const GlobalValue *GV = nullptr;
2911 unsigned OpFlags;
2912 if (I.getOperand(1).isSymbol()) {
2913 OpFlags = I.getOperand(1).getTargetFlags();
2914 // Currently only used by "RtLibUseGOT".
2915 assert(OpFlags == AArch64II::MO_GOT);
2916 } else {
2917 GV = I.getOperand(1).getGlobal();
2918 if (GV->isThreadLocal()) {
2919 // We don't support instructions with emulated TLS variables yet
2920 if (TM.useEmulatedTLS())
2921 return false;
2922 return selectTLSGlobalValue(I, MRI);
2923 }
2924 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2925 }
2926
2927 if (OpFlags & AArch64II::MO_GOT) {
2928 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2929 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2930 I.getOperand(1).setTargetFlags(OpFlags);
2931 I.addImplicitDefUseOperands(MF);
2932 } else if (TM.getCodeModel() == CodeModel::Large &&
2933 !TM.isPositionIndependent()) {
2934 // Materialize the global using movz/movk instructions.
2935 materializeLargeCMVal(I, GV, OpFlags);
2936 I.eraseFromParent();
2937 return true;
2938 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2939 I.setDesc(TII.get(AArch64::ADR));
2940 I.getOperand(1).setTargetFlags(OpFlags);
2941 } else {
2942 I.setDesc(TII.get(AArch64::MOVaddr));
2943 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2944 MachineInstrBuilder MIB(MF, I);
2945 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2947 }
2949 return true;
2950 }
2951
2952 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2953 return selectPtrAuthGlobalValue(I, MRI);
2954
2955 case TargetOpcode::G_ZEXTLOAD:
2956 case TargetOpcode::G_LOAD:
2957 case TargetOpcode::G_STORE: {
2958 GLoadStore &LdSt = cast<GLoadStore>(I);
2959 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2960 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2961
2962 // Can only handle AddressSpace 0, 64-bit pointers.
2963 if (PtrTy != LLT::pointer(0, 64)) {
2964 return false;
2965 }
2966
2967 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2968 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2969 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2970
2971 // Need special instructions for atomics that affect ordering.
2972 if (isStrongerThanMonotonic(Order)) {
2973 assert(!isa<GZExtLoad>(LdSt));
2974 assert(MemSizeInBytes <= 8 &&
2975 "128-bit atomics should already be custom-legalized");
2976
2977 if (isa<GLoad>(LdSt)) {
2978 static constexpr unsigned LDAPROpcodes[] = {
2979 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2980 static constexpr unsigned LDAROpcodes[] = {
2981 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2982 ArrayRef<unsigned> Opcodes =
2983 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2984 ? LDAPROpcodes
2985 : LDAROpcodes;
2986 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2987 } else {
2988 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2989 AArch64::STLRW, AArch64::STLRX};
2990 Register ValReg = LdSt.getReg(0);
2991 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2992 // Emit a subreg copy of 32 bits.
2993 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2994 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2995 .addReg(I.getOperand(0).getReg(), {}, AArch64::sub_32);
2996 I.getOperand(0).setReg(NewVal);
2997 }
2998 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2999 }
3001 return true;
3002 }
3003
3004#ifndef NDEBUG
3005 const Register PtrReg = LdSt.getPointerReg();
3006 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
3007 // Check that the pointer register is valid.
3008 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
3009 "Load/Store pointer operand isn't a GPR");
3010 assert(MRI.getType(PtrReg).isPointer() &&
3011 "Load/Store pointer operand isn't a pointer");
3012#endif
3013
3014 const Register ValReg = LdSt.getReg(0);
3015 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3016 LLT ValTy = MRI.getType(ValReg);
3017
3018 // The code below doesn't support truncating stores, so we need to split it
3019 // again.
3020 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3021 unsigned SubReg;
3022 LLT MemTy = LdSt.getMMO().getMemoryType();
3023 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3024 if (!getSubRegForClass(RC, TRI, SubReg))
3025 return false;
3026
3027 // Generate a subreg copy.
3028 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3029 .addReg(ValReg, {}, SubReg)
3030 .getReg(0);
3031 RBI.constrainGenericRegister(Copy, *RC, MRI);
3032 LdSt.getOperand(0).setReg(Copy);
3033 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3034 // If this is an any-extending load from the FPR bank, split it into a regular
3035 // load + extend.
3036 if (RB.getID() == AArch64::FPRRegBankID) {
3037 unsigned SubReg;
3038 LLT MemTy = LdSt.getMMO().getMemoryType();
3039 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3040 if (!getSubRegForClass(RC, TRI, SubReg))
3041 return false;
3042 Register OldDst = LdSt.getReg(0);
3043 Register NewDst =
3045 LdSt.getOperand(0).setReg(NewDst);
3046 MRI.setRegBank(NewDst, RB);
3047 // Generate a SUBREG_TO_REG to extend it.
3048 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3049 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3050 .addUse(NewDst)
3051 .addImm(SubReg);
3052 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3053 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3054 MIB.setInstr(LdSt);
3055 ValTy = MemTy; // This is no longer an extending load.
3056 }
3057 }
3058
3059 // Helper lambda for partially selecting I. Either returns the original
3060 // instruction with an updated opcode, or a new instruction.
3061 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3062 bool IsStore = isa<GStore>(I);
3063 const unsigned NewOpc =
3064 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3065 if (NewOpc == I.getOpcode())
3066 return nullptr;
3067 // Check if we can fold anything into the addressing mode.
3068 auto AddrModeFns =
3069 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3070 if (!AddrModeFns) {
3071 // Can't fold anything. Use the original instruction.
3072 I.setDesc(TII.get(NewOpc));
3073 I.addOperand(MachineOperand::CreateImm(0));
3074 return &I;
3075 }
3076
3077 // Folded something. Create a new instruction and return it.
3078 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3079 Register CurValReg = I.getOperand(0).getReg();
3080 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3081 NewInst.cloneMemRefs(I);
3082 for (auto &Fn : *AddrModeFns)
3083 Fn(NewInst);
3084 I.eraseFromParent();
3085 return &*NewInst;
3086 };
3087
3088 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3089 if (!LoadStore)
3090 return false;
3091
3092 // If we're storing a 0, use WZR/XZR.
3093 if (Opcode == TargetOpcode::G_STORE) {
3095 LoadStore->getOperand(0).getReg(), MRI);
3096 if (CVal && CVal->Value == 0) {
3097 switch (LoadStore->getOpcode()) {
3098 case AArch64::STRWui:
3099 case AArch64::STRHHui:
3100 case AArch64::STRBBui:
3101 LoadStore->getOperand(0).setReg(AArch64::WZR);
3102 break;
3103 case AArch64::STRXui:
3104 LoadStore->getOperand(0).setReg(AArch64::XZR);
3105 break;
3106 }
3107 }
3108 }
3109
3110 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3111 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3112 // The any/zextload from a smaller type to i32 should be handled by the
3113 // importer.
3114 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3115 return false;
3116 // If we have an extending load then change the load's type to be a
3117 // narrower reg and zero_extend with SUBREG_TO_REG.
3118 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3119 Register DstReg = LoadStore->getOperand(0).getReg();
3120 LoadStore->getOperand(0).setReg(LdReg);
3121
3122 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3123 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3124 .addUse(LdReg)
3125 .addImm(AArch64::sub_32);
3126 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3127 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3128 MRI);
3129 }
3130 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3131 return true;
3132 }
3133
3134 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3135 case TargetOpcode::G_INDEXED_SEXTLOAD:
3136 return selectIndexedExtLoad(I, MRI);
3137 case TargetOpcode::G_INDEXED_LOAD:
3138 return selectIndexedLoad(I, MRI);
3139 case TargetOpcode::G_INDEXED_STORE:
3140 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3141
3142 case TargetOpcode::G_LSHR:
3143 case TargetOpcode::G_ASHR:
3144 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3145 return selectVectorAshrLshr(I, MRI);
3146 [[fallthrough]];
3147 case TargetOpcode::G_SHL:
3148 if (Opcode == TargetOpcode::G_SHL &&
3149 MRI.getType(I.getOperand(0).getReg()).isVector())
3150 return selectVectorSHL(I, MRI);
3151
3152 // These shifts were legalized to have 64 bit shift amounts because we
3153 // want to take advantage of the selection patterns that assume the
3154 // immediates are s64s, however, selectBinaryOp will assume both operands
3155 // will have the same bit size.
3156 {
3157 Register SrcReg = I.getOperand(1).getReg();
3158 Register ShiftReg = I.getOperand(2).getReg();
3159 const LLT ShiftTy = MRI.getType(ShiftReg);
3160 const LLT SrcTy = MRI.getType(SrcReg);
3161 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3162 ShiftTy.getSizeInBits() == 64) {
3163 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3164 // Insert a subregister copy to implement a 64->32 trunc
3165 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3166 .addReg(ShiftReg, {}, AArch64::sub_32);
3167 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3168 I.getOperand(2).setReg(Trunc.getReg(0));
3169 }
3170 }
3171 [[fallthrough]];
3172 case TargetOpcode::G_OR: {
3173 // Reject the various things we don't support yet.
3174 if (unsupportedBinOp(I, RBI, MRI, TRI))
3175 return false;
3176
3177 const unsigned OpSize = Ty.getSizeInBits();
3178
3179 const Register DefReg = I.getOperand(0).getReg();
3180 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3181
3182 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3183 if (NewOpc == I.getOpcode())
3184 return false;
3185
3186 I.setDesc(TII.get(NewOpc));
3187 // FIXME: Should the type be always reset in setDesc?
3188
3189 // Now that we selected an opcode, we need to constrain the register
3190 // operands to use appropriate classes.
3192 return true;
3193 }
3194
3195 case TargetOpcode::G_PTR_ADD: {
3196 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3197 I.eraseFromParent();
3198 return true;
3199 }
3200
3201 case TargetOpcode::G_SADDE:
3202 case TargetOpcode::G_UADDE:
3203 case TargetOpcode::G_SSUBE:
3204 case TargetOpcode::G_USUBE:
3205 case TargetOpcode::G_SADDO:
3206 case TargetOpcode::G_UADDO:
3207 case TargetOpcode::G_SSUBO:
3208 case TargetOpcode::G_USUBO:
3209 return selectOverflowOp(I, MRI);
3210
3211 case TargetOpcode::G_PTRMASK: {
3212 Register MaskReg = I.getOperand(2).getReg();
3213 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3214 // TODO: Implement arbitrary cases
3215 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3216 return false;
3217
3218 uint64_t Mask = *MaskVal;
3219 I.setDesc(TII.get(AArch64::ANDXri));
3220 I.getOperand(2).ChangeToImmediate(
3222
3224 return true;
3225 }
3226 case TargetOpcode::G_PTRTOINT:
3227 case TargetOpcode::G_TRUNC: {
3228 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3229 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3230
3231 const Register DstReg = I.getOperand(0).getReg();
3232 const Register SrcReg = I.getOperand(1).getReg();
3233
3234 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3235 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3236
3237 if (DstRB.getID() != SrcRB.getID()) {
3238 LLVM_DEBUG(
3239 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3240 return false;
3241 }
3242
3243 if (DstRB.getID() == AArch64::GPRRegBankID) {
3244 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3245 if (!DstRC)
3246 return false;
3247
3248 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3249 if (!SrcRC)
3250 return false;
3251
3252 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3253 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3254 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3255 return false;
3256 }
3257
3258 if (DstRC == SrcRC) {
3259 // Nothing to be done
3260 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3261 SrcTy == LLT::scalar(64)) {
3262 llvm_unreachable("TableGen can import this case");
3263 return false;
3264 } else if (DstRC == &AArch64::GPR32RegClass &&
3265 SrcRC == &AArch64::GPR64RegClass) {
3266 I.getOperand(1).setSubReg(AArch64::sub_32);
3267 } else {
3268 LLVM_DEBUG(
3269 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3270 return false;
3271 }
3272
3273 I.setDesc(TII.get(TargetOpcode::COPY));
3274 return true;
3275 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3276 if (DstTy == LLT::fixed_vector(4, 16) &&
3277 SrcTy == LLT::fixed_vector(4, 32)) {
3278 I.setDesc(TII.get(AArch64::XTNv4i16));
3280 return true;
3281 }
3282
3283 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3284 MachineInstr *Extract = emitExtractVectorElt(
3285 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3286 if (!Extract)
3287 return false;
3288 I.eraseFromParent();
3289 return true;
3290 }
3291
3292 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3293 if (Opcode == TargetOpcode::G_PTRTOINT) {
3294 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3295 I.setDesc(TII.get(TargetOpcode::COPY));
3296 return selectCopy(I, TII, MRI, TRI, RBI);
3297 }
3298 }
3299
3300 return false;
3301 }
3302
3303 case TargetOpcode::G_ANYEXT: {
3304 if (selectUSMovFromExtend(I, MRI))
3305 return true;
3306
3307 const Register DstReg = I.getOperand(0).getReg();
3308 const Register SrcReg = I.getOperand(1).getReg();
3309
3310 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3311 if (RBDst.getID() != AArch64::GPRRegBankID) {
3312 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3313 << ", expected: GPR\n");
3314 return false;
3315 }
3316
3317 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3318 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3319 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3320 << ", expected: GPR\n");
3321 return false;
3322 }
3323
3324 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3325
3326 if (DstSize == 0) {
3327 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3328 return false;
3329 }
3330
3331 if (DstSize != 64 && DstSize > 32) {
3332 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3333 << ", expected: 32 or 64\n");
3334 return false;
3335 }
3336 // At this point G_ANYEXT is just like a plain COPY, but we need
3337 // to explicitly form the 64-bit value if any.
3338 if (DstSize > 32) {
3339 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3340 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3341 .addDef(ExtSrc)
3342 .addUse(SrcReg)
3343 .addImm(AArch64::sub_32);
3344 I.getOperand(1).setReg(ExtSrc);
3345 }
3346 return selectCopy(I, TII, MRI, TRI, RBI);
3347 }
3348
3349 case TargetOpcode::G_ZEXT:
3350 case TargetOpcode::G_SEXT_INREG:
3351 case TargetOpcode::G_SEXT: {
3352 if (selectUSMovFromExtend(I, MRI))
3353 return true;
3354
3355 unsigned Opcode = I.getOpcode();
3356 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3357 const Register DefReg = I.getOperand(0).getReg();
3358 Register SrcReg = I.getOperand(1).getReg();
3359 const LLT DstTy = MRI.getType(DefReg);
3360 const LLT SrcTy = MRI.getType(SrcReg);
3361 unsigned DstSize = DstTy.getSizeInBits();
3362 unsigned SrcSize = SrcTy.getSizeInBits();
3363
3364 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3365 // extended is encoded in the imm.
3366 if (Opcode == TargetOpcode::G_SEXT_INREG)
3367 SrcSize = I.getOperand(2).getImm();
3368
3369 if (DstTy.isVector())
3370 return false; // Should be handled by imported patterns.
3371
3372 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3373 AArch64::GPRRegBankID &&
3374 "Unexpected ext regbank");
3375
3376 MachineInstr *ExtI;
3377
3378 // First check if we're extending the result of a load which has a dest type
3379 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3380 // GPR register on AArch64 and all loads which are smaller automatically
3381 // zero-extend the upper bits. E.g.
3382 // %v(s8) = G_LOAD %p, :: (load 1)
3383 // %v2(s32) = G_ZEXT %v(s8)
3384 if (!IsSigned) {
3385 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3386 bool IsGPR =
3387 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3388 if (LoadMI && IsGPR) {
3389 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3390 unsigned BytesLoaded = MemOp->getSize().getValue();
3391 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3392 return selectCopy(I, TII, MRI, TRI, RBI);
3393 }
3394
3395 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3396 // + SUBREG_TO_REG.
3397 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3398 Register SubregToRegSrc =
3399 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3400 const Register ZReg = AArch64::WZR;
3401 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3402 .addImm(0);
3403
3404 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3405 .addUse(SubregToRegSrc)
3406 .addImm(AArch64::sub_32);
3407
3408 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3409 MRI)) {
3410 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3411 return false;
3412 }
3413
3414 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3415 MRI)) {
3416 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3417 return false;
3418 }
3419
3420 I.eraseFromParent();
3421 return true;
3422 }
3423 }
3424
3425 if (DstSize == 64) {
3426 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3427 // FIXME: Can we avoid manually doing this?
3428 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3429 MRI)) {
3430 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3431 << " operand\n");
3432 return false;
3433 }
3434 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3435 {&AArch64::GPR64RegClass}, {})
3436 .addUse(SrcReg)
3437 .addImm(AArch64::sub_32)
3438 .getReg(0);
3439 }
3440
3441 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3442 {DefReg}, {SrcReg})
3443 .addImm(0)
3444 .addImm(SrcSize - 1);
3445 } else if (DstSize <= 32) {
3446 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3447 {DefReg}, {SrcReg})
3448 .addImm(0)
3449 .addImm(SrcSize - 1);
3450 } else {
3451 return false;
3452 }
3453
3455 I.eraseFromParent();
3456 return true;
3457 }
3458
3459 case TargetOpcode::G_FREEZE:
3460 return selectCopy(I, TII, MRI, TRI, RBI);
3461
3462 case TargetOpcode::G_INTTOPTR:
3463 // The importer is currently unable to import pointer types since they
3464 // didn't exist in SelectionDAG.
3465 return selectCopy(I, TII, MRI, TRI, RBI);
3466
3467 case TargetOpcode::G_BITCAST:
3468 // Imported SelectionDAG rules can handle every bitcast except those that
3469 // bitcast from a type to the same type. Ideally, these shouldn't occur
3470 // but we might not run an optimizer that deletes them. The other exception
3471 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3472 // of them.
3473 return selectCopy(I, TII, MRI, TRI, RBI);
3474
3475 case TargetOpcode::G_SELECT: {
3476 auto &Sel = cast<GSelect>(I);
3477 const Register CondReg = Sel.getCondReg();
3478 const Register TReg = Sel.getTrueReg();
3479 const Register FReg = Sel.getFalseReg();
3480
3481 if (tryOptSelect(Sel))
3482 return true;
3483
3484 // Make sure to use an unused vreg instead of wzr, so that the peephole
3485 // optimizations will be able to optimize these.
3486 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3487 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3488 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3490 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3491 return false;
3492 Sel.eraseFromParent();
3493 return true;
3494 }
3495 case TargetOpcode::G_ICMP: {
3496 if (Ty.isVector())
3497 return false;
3498
3499 if (Ty != LLT::scalar(32)) {
3500 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3501 << ", expected: " << LLT::scalar(32) << '\n');
3502 return false;
3503 }
3504
3505 auto &PredOp = I.getOperand(1);
3506 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3507 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3509 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3510 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3511 /*Src2=*/AArch64::WZR, InvCC, MIB);
3512 I.eraseFromParent();
3513 return true;
3514 }
3515
3516 case TargetOpcode::G_FCMP: {
3517 CmpInst::Predicate Pred =
3518 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3519 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3520 Pred) ||
3521 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3522 return false;
3523 I.eraseFromParent();
3524 return true;
3525 }
3526 case TargetOpcode::G_VASTART:
3527 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3528 : selectVaStartAAPCS(I, MF, MRI);
3529 case TargetOpcode::G_INTRINSIC:
3530 return selectIntrinsic(I, MRI);
3531 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3532 return selectIntrinsicWithSideEffects(I, MRI);
3533 case TargetOpcode::G_IMPLICIT_DEF: {
3534 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3535 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3536 const Register DstReg = I.getOperand(0).getReg();
3537 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3538 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3539 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3540 return true;
3541 }
3542 case TargetOpcode::G_BLOCK_ADDR: {
3543 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3544 if (std::optional<uint16_t> BADisc =
3546 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3547 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3548 MIB.buildInstr(AArch64::MOVaddrPAC)
3549 .addBlockAddress(I.getOperand(1).getBlockAddress())
3551 .addReg(/*AddrDisc=*/AArch64::XZR)
3552 .addImm(*BADisc)
3553 .constrainAllUses(TII, TRI, RBI);
3554 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3555 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3556 AArch64::GPR64RegClass, MRI);
3557 I.eraseFromParent();
3558 return true;
3559 }
3561 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3562 I.eraseFromParent();
3563 return true;
3564 } else {
3565 I.setDesc(TII.get(AArch64::MOVaddrBA));
3566 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3567 I.getOperand(0).getReg())
3568 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3569 /* Offset */ 0, AArch64II::MO_PAGE)
3571 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3573 I.eraseFromParent();
3575 return true;
3576 }
3577 }
3578 case AArch64::G_DUP: {
3579 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3580 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3581 // difficult because at RBS we may end up pessimizing the fpr case if we
3582 // decided to add an anyextend to fix this. Manual selection is the most
3583 // robust solution for now.
3584 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3585 AArch64::GPRRegBankID)
3586 return false; // We expect the fpr regbank case to be imported.
3587 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3588 if (VecTy == LLT::fixed_vector(8, 8))
3589 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3590 else if (VecTy == LLT::fixed_vector(16, 8))
3591 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3592 else if (VecTy == LLT::fixed_vector(4, 16))
3593 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3594 else if (VecTy == LLT::fixed_vector(8, 16))
3595 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3596 else
3597 return false;
3599 return true;
3600 }
3601 case TargetOpcode::G_BUILD_VECTOR:
3602 return selectBuildVector(I, MRI);
3603 case TargetOpcode::G_MERGE_VALUES:
3604 return selectMergeValues(I, MRI);
3605 case TargetOpcode::G_UNMERGE_VALUES:
3606 return selectUnmergeValues(I, MRI);
3607 case TargetOpcode::G_SHUFFLE_VECTOR:
3608 return selectShuffleVector(I, MRI);
3609 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3610 return selectExtractElt(I, MRI);
3611 case TargetOpcode::G_CONCAT_VECTORS:
3612 return selectConcatVectors(I, MRI);
3613 case TargetOpcode::G_JUMP_TABLE:
3614 return selectJumpTable(I, MRI);
3615 case TargetOpcode::G_MEMCPY:
3616 case TargetOpcode::G_MEMCPY_INLINE:
3617 case TargetOpcode::G_MEMMOVE:
3618 case TargetOpcode::G_MEMSET:
3619 case TargetOpcode::G_MEMSET_INLINE:
3620 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3621 return selectMOPS(I, MRI);
3622 }
3623
3624 return false;
3625}
3626
3627bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3628 MachineIRBuilderState OldMIBState = MIB.getState();
3629 bool Success = select(I);
3630 MIB.setState(OldMIBState);
3631 return Success;
3632}
3633
3634bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3635 MachineRegisterInfo &MRI) {
3636 unsigned Mopcode;
3637 switch (GI.getOpcode()) {
3638 case TargetOpcode::G_MEMCPY:
3639 case TargetOpcode::G_MEMCPY_INLINE:
3640 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3641 break;
3642 case TargetOpcode::G_MEMMOVE:
3643 Mopcode = AArch64::MOPSMemoryMovePseudo;
3644 break;
3645 case TargetOpcode::G_MEMSET:
3646 case TargetOpcode::G_MEMSET_INLINE:
3647 // For tagged memset see llvm.aarch64.mops.memset.tag
3648 Mopcode = AArch64::MOPSMemorySetPseudo;
3649 break;
3650 }
3651
3652 auto &DstPtr = GI.getOperand(0);
3653 auto &SrcOrVal = GI.getOperand(1);
3654 auto &Size = GI.getOperand(2);
3655
3656 // Create copies of the registers that can be clobbered.
3657 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3658 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3659 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3660
3661 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3662 const auto &SrcValRegClass =
3663 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3664
3665 // Constrain to specific registers
3666 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3667 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3668 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3669
3670 MIB.buildCopy(DstPtrCopy, DstPtr);
3671 MIB.buildCopy(SrcValCopy, SrcOrVal);
3672 MIB.buildCopy(SizeCopy, Size);
3673
3674 // New instruction uses the copied registers because it must update them.
3675 // The defs are not used since they don't exist in G_MEM*. They are still
3676 // tied.
3677 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3678 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3679 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3680 if (IsSet) {
3681 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3682 {DstPtrCopy, SizeCopy, SrcValCopy});
3683 } else {
3684 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3685 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3686 {DstPtrCopy, SrcValCopy, SizeCopy});
3687 }
3688
3689 GI.eraseFromParent();
3690 return true;
3691}
3692
3693bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3694 MachineRegisterInfo &MRI) {
3695 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3696 Register JTAddr = I.getOperand(0).getReg();
3697 unsigned JTI = I.getOperand(1).getIndex();
3698 Register Index = I.getOperand(2).getReg();
3699
3700 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3701
3702 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3703 // sequence later, to guarantee the integrity of the intermediate values.
3704 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3706 if (STI.isTargetMachO()) {
3707 if (CM != CodeModel::Small && CM != CodeModel::Large)
3708 report_fatal_error("Unsupported code-model for hardened jump-table");
3709 } else {
3710 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3711 assert(STI.isTargetELF() &&
3712 "jump table hardening only supported on MachO/ELF");
3713 if (CM != CodeModel::Small)
3714 report_fatal_error("Unsupported code-model for hardened jump-table");
3715 }
3716
3717 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3718 MIB.buildInstr(AArch64::BR_JumpTable)
3719 .addJumpTableIndex(I.getOperand(1).getIndex());
3720 I.eraseFromParent();
3721 return true;
3722 }
3723
3724 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3725 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3726
3727 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3728 {TargetReg, ScratchReg}, {JTAddr, Index})
3729 .addJumpTableIndex(JTI);
3730 // Save the jump table info.
3731 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3732 {static_cast<int64_t>(JTI)});
3733 // Build the indirect branch.
3734 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3735 I.eraseFromParent();
3736 constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3737 return true;
3738}
3739
3740bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3741 MachineRegisterInfo &MRI) {
3742 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3743 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3744
3745 Register DstReg = I.getOperand(0).getReg();
3746 unsigned JTI = I.getOperand(1).getIndex();
3747 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3748 auto MovMI =
3749 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3750 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3752 I.eraseFromParent();
3754 return true;
3755}
3756
3757bool AArch64InstructionSelector::selectTLSGlobalValue(
3758 MachineInstr &I, MachineRegisterInfo &MRI) {
3759 if (!STI.isTargetMachO())
3760 return false;
3761 MachineFunction &MF = *I.getParent()->getParent();
3762 MF.getFrameInfo().setAdjustsStack(true);
3763
3764 const auto &GlobalOp = I.getOperand(1);
3765 assert(GlobalOp.getOffset() == 0 &&
3766 "Shouldn't have an offset on TLS globals!");
3767 const GlobalValue &GV = *GlobalOp.getGlobal();
3768
3769 auto LoadGOT =
3770 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3771 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3772
3773 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3774 {LoadGOT.getReg(0)})
3775 .addImm(0);
3776
3777 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3778 // TLS calls preserve all registers except those that absolutely must be
3779 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3780 // silly).
3781 unsigned Opcode = getBLRCallOpcode(MF);
3782
3783 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3784 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3785 assert(Opcode == AArch64::BLR);
3786 Opcode = AArch64::BLRAAZ;
3787 }
3788
3789 MIB.buildInstr(Opcode, {}, {Load})
3790 .addUse(AArch64::X0, RegState::Implicit)
3791 .addDef(AArch64::X0, RegState::Implicit)
3792 .addRegMask(TRI.getTLSCallPreservedMask());
3793
3794 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3795 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3796 MRI);
3797 I.eraseFromParent();
3798 return true;
3799}
3800
3801MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3802 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3803 MachineIRBuilder &MIRBuilder) const {
3804 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3805
3806 auto BuildFn = [&](unsigned SubregIndex) {
3807 auto Ins =
3808 MIRBuilder
3809 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3810 .addImm(SubregIndex);
3813 return &*Ins;
3814 };
3815
3816 switch (EltSize) {
3817 case 8:
3818 return BuildFn(AArch64::bsub);
3819 case 16:
3820 return BuildFn(AArch64::hsub);
3821 case 32:
3822 return BuildFn(AArch64::ssub);
3823 case 64:
3824 return BuildFn(AArch64::dsub);
3825 default:
3826 return nullptr;
3827 }
3828}
3829
3830MachineInstr *
3831AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3832 MachineIRBuilder &MIB,
3833 MachineRegisterInfo &MRI) const {
3834 LLT DstTy = MRI.getType(DstReg);
3835 const TargetRegisterClass *RC =
3836 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3837 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3838 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3839 return nullptr;
3840 }
3841 unsigned SubReg = 0;
3842 if (!getSubRegForClass(RC, TRI, SubReg))
3843 return nullptr;
3844 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3845 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3846 << DstTy.getSizeInBits() << "\n");
3847 return nullptr;
3848 }
3849 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3850 .addReg(SrcReg, {}, SubReg);
3851 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3852 return Copy;
3853}
3854
3855bool AArch64InstructionSelector::selectMergeValues(
3856 MachineInstr &I, MachineRegisterInfo &MRI) {
3857 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3858 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3859 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3860 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3861 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3862
3863 if (I.getNumOperands() != 3)
3864 return false;
3865
3866 // Merging 2 s64s into an s128.
3867 if (DstTy == LLT::scalar(128)) {
3868 if (SrcTy.getSizeInBits() != 64)
3869 return false;
3870 Register DstReg = I.getOperand(0).getReg();
3871 Register Src1Reg = I.getOperand(1).getReg();
3872 Register Src2Reg = I.getOperand(2).getReg();
3873 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3874 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3875 /* LaneIdx */ 0, RB, MIB);
3876 if (!InsMI)
3877 return false;
3878 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3879 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3880 if (!Ins2MI)
3881 return false;
3884 I.eraseFromParent();
3885 return true;
3886 }
3887
3888 if (RB.getID() != AArch64::GPRRegBankID)
3889 return false;
3890
3891 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3892 return false;
3893
3894 auto *DstRC = &AArch64::GPR64RegClass;
3895 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3896 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3897 TII.get(TargetOpcode::SUBREG_TO_REG))
3898 .addDef(SubToRegDef)
3899 .addUse(I.getOperand(1).getReg())
3900 .addImm(AArch64::sub_32);
3901 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3902 // Need to anyext the second scalar before we can use bfm
3903 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3904 TII.get(TargetOpcode::SUBREG_TO_REG))
3905 .addDef(SubToRegDef2)
3906 .addUse(I.getOperand(2).getReg())
3907 .addImm(AArch64::sub_32);
3908 MachineInstr &BFM =
3909 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3910 .addDef(I.getOperand(0).getReg())
3911 .addUse(SubToRegDef)
3912 .addUse(SubToRegDef2)
3913 .addImm(32)
3914 .addImm(31);
3915 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3916 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3918 I.eraseFromParent();
3919 return true;
3920}
3921
3922static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3923 const unsigned EltSize) {
3924 // Choose a lane copy opcode and subregister based off of the size of the
3925 // vector's elements.
3926 switch (EltSize) {
3927 case 8:
3928 CopyOpc = AArch64::DUPi8;
3929 ExtractSubReg = AArch64::bsub;
3930 break;
3931 case 16:
3932 CopyOpc = AArch64::DUPi16;
3933 ExtractSubReg = AArch64::hsub;
3934 break;
3935 case 32:
3936 CopyOpc = AArch64::DUPi32;
3937 ExtractSubReg = AArch64::ssub;
3938 break;
3939 case 64:
3940 CopyOpc = AArch64::DUPi64;
3941 ExtractSubReg = AArch64::dsub;
3942 break;
3943 default:
3944 // Unknown size, bail out.
3945 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3946 return false;
3947 }
3948 return true;
3949}
3950
3951MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3952 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3953 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3954 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3955 unsigned CopyOpc = 0;
3956 unsigned ExtractSubReg = 0;
3957 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3958 LLVM_DEBUG(
3959 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3960 return nullptr;
3961 }
3962
3963 const TargetRegisterClass *DstRC =
3964 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3965 if (!DstRC) {
3966 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3967 return nullptr;
3968 }
3969
3970 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3971 const LLT &VecTy = MRI.getType(VecReg);
3972 const TargetRegisterClass *VecRC =
3973 getRegClassForTypeOnBank(VecTy, VecRB, true);
3974 if (!VecRC) {
3975 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3976 return nullptr;
3977 }
3978
3979 // The register that we're going to copy into.
3980 Register InsertReg = VecReg;
3981 if (!DstReg)
3982 DstReg = MRI.createVirtualRegister(DstRC);
3983 // If the lane index is 0, we just use a subregister COPY.
3984 if (LaneIdx == 0) {
3985 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3986 .addReg(VecReg, {}, ExtractSubReg);
3987 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3988 return &*Copy;
3989 }
3990
3991 // Lane copies require 128-bit wide registers. If we're dealing with an
3992 // unpacked vector, then we need to move up to that width. Insert an implicit
3993 // def and a subregister insert to get us there.
3994 if (VecTy.getSizeInBits() != 128) {
3995 MachineInstr *ScalarToVector = emitScalarToVector(
3996 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3997 if (!ScalarToVector)
3998 return nullptr;
3999 InsertReg = ScalarToVector->getOperand(0).getReg();
4000 }
4001
4002 MachineInstr *LaneCopyMI =
4003 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4004 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4005
4006 // Make sure that we actually constrain the initial copy.
4007 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4008 return LaneCopyMI;
4009}
4010
4011bool AArch64InstructionSelector::selectExtractElt(
4012 MachineInstr &I, MachineRegisterInfo &MRI) {
4013 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4014 "unexpected opcode!");
4015 Register DstReg = I.getOperand(0).getReg();
4016 const LLT NarrowTy = MRI.getType(DstReg);
4017 const Register SrcReg = I.getOperand(1).getReg();
4018 const LLT WideTy = MRI.getType(SrcReg);
4019 (void)WideTy;
4020 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4021 "source register size too small!");
4022 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4023
4024 // Need the lane index to determine the correct copy opcode.
4025 MachineOperand &LaneIdxOp = I.getOperand(2);
4026 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4027
4028 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4029 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4030 return false;
4031 }
4032
4033 // Find the index to extract from.
4034 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4035 if (!VRegAndVal)
4036 return false;
4037 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4038
4039
4040 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4041 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4042 LaneIdx, MIB);
4043 if (!Extract)
4044 return false;
4045
4046 I.eraseFromParent();
4047 return true;
4048}
4049
4050bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4051 MachineInstr &I, MachineRegisterInfo &MRI) {
4052 unsigned NumElts = I.getNumOperands() - 1;
4053 Register SrcReg = I.getOperand(NumElts).getReg();
4054 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4055 const LLT SrcTy = MRI.getType(SrcReg);
4056
4057 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4058 if (SrcTy.getSizeInBits() > 128) {
4059 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4060 return false;
4061 }
4062
4063 // We implement a split vector operation by treating the sub-vectors as
4064 // scalars and extracting them.
4065 const RegisterBank &DstRB =
4066 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4067 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4068 Register Dst = I.getOperand(OpIdx).getReg();
4069 MachineInstr *Extract =
4070 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4071 if (!Extract)
4072 return false;
4073 }
4074 I.eraseFromParent();
4075 return true;
4076}
4077
4078bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4079 MachineRegisterInfo &MRI) {
4080 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4081 "unexpected opcode");
4082
4083 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4084 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4085 AArch64::FPRRegBankID ||
4086 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4087 AArch64::FPRRegBankID) {
4088 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4089 "currently unsupported.\n");
4090 return false;
4091 }
4092
4093 // The last operand is the vector source register, and every other operand is
4094 // a register to unpack into.
4095 unsigned NumElts = I.getNumOperands() - 1;
4096 Register SrcReg = I.getOperand(NumElts).getReg();
4097 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4098 const LLT WideTy = MRI.getType(SrcReg);
4099
4100 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4101 "source register size too small!");
4102
4103 if (!NarrowTy.isScalar())
4104 return selectSplitVectorUnmerge(I, MRI);
4105
4106 // Choose a lane copy opcode and subregister based off of the size of the
4107 // vector's elements.
4108 unsigned CopyOpc = 0;
4109 unsigned ExtractSubReg = 0;
4110 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4111 return false;
4112
4113 // Set up for the lane copies.
4114 MachineBasicBlock &MBB = *I.getParent();
4115
4116 // Stores the registers we'll be copying from.
4117 SmallVector<Register, 4> InsertRegs;
4118
4119 // We'll use the first register twice, so we only need NumElts-1 registers.
4120 unsigned NumInsertRegs = NumElts - 1;
4121
4122 // If our elements fit into exactly 128 bits, then we can copy from the source
4123 // directly. Otherwise, we need to do a bit of setup with some subregister
4124 // inserts.
4125 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4126 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4127 } else {
4128 // No. We have to perform subregister inserts. For each insert, create an
4129 // implicit def and a subregister insert, and save the register we create.
4130 // For scalar sources, treat as a pseudo-vector of NarrowTy elements.
4131 unsigned EltSize = WideTy.isVector() ? WideTy.getScalarSizeInBits()
4132 : NarrowTy.getSizeInBits();
4133 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4134 LLT::fixed_vector(NumElts, EltSize), *RBI.getRegBank(SrcReg, MRI, TRI));
4135 unsigned SubReg = 0;
4136 bool Found = getSubRegForClass(RC, TRI, SubReg);
4137 (void)Found;
4138 assert(Found && "expected to find last operand's subeg idx");
4139 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4140 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4141 MachineInstr &ImpDefMI =
4142 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4143 ImpDefReg);
4144
4145 // Now, create the subregister insert from SrcReg.
4146 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4147 MachineInstr &InsMI =
4148 *BuildMI(MBB, I, I.getDebugLoc(),
4149 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4150 .addUse(ImpDefReg)
4151 .addUse(SrcReg)
4152 .addImm(SubReg);
4153
4154 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4156
4157 // Save the register so that we can copy from it after.
4158 InsertRegs.push_back(InsertReg);
4159 }
4160 }
4161
4162 // Now that we've created any necessary subregister inserts, we can
4163 // create the copies.
4164 //
4165 // Perform the first copy separately as a subregister copy.
4166 Register CopyTo = I.getOperand(0).getReg();
4167 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4168 .addReg(InsertRegs[0], {}, ExtractSubReg);
4169 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4170
4171 // Now, perform the remaining copies as vector lane copies.
4172 unsigned LaneIdx = 1;
4173 for (Register InsReg : InsertRegs) {
4174 Register CopyTo = I.getOperand(LaneIdx).getReg();
4175 MachineInstr &CopyInst =
4176 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4177 .addUse(InsReg)
4178 .addImm(LaneIdx);
4179 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4180 ++LaneIdx;
4181 }
4182
4183 // Separately constrain the first copy's destination. Because of the
4184 // limitation in constrainOperandRegClass, we can't guarantee that this will
4185 // actually be constrained. So, do it ourselves using the second operand.
4186 const TargetRegisterClass *RC =
4187 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4188 if (!RC) {
4189 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4190 return false;
4191 }
4192
4193 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4194 I.eraseFromParent();
4195 return true;
4196}
4197
4198bool AArch64InstructionSelector::selectConcatVectors(
4199 MachineInstr &I, MachineRegisterInfo &MRI) {
4200 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4201 "Unexpected opcode");
4202 Register Dst = I.getOperand(0).getReg();
4203 Register Op1 = I.getOperand(1).getReg();
4204 Register Op2 = I.getOperand(2).getReg();
4205 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4206 if (!ConcatMI)
4207 return false;
4208 I.eraseFromParent();
4209 return true;
4210}
4211
4212unsigned
4213AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4214 MachineFunction &MF) const {
4215 Type *CPTy = CPVal->getType();
4216 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4217
4218 MachineConstantPool *MCP = MF.getConstantPool();
4219 return MCP->getConstantPoolIndex(CPVal, Alignment);
4220}
4221
4222MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4223 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4224 const TargetRegisterClass *RC;
4225 unsigned Opc;
4226 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4227 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4228 switch (Size) {
4229 case 16:
4230 RC = &AArch64::FPR128RegClass;
4231 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4232 break;
4233 case 8:
4234 RC = &AArch64::FPR64RegClass;
4235 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4236 break;
4237 case 4:
4238 RC = &AArch64::FPR32RegClass;
4239 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4240 break;
4241 case 2:
4242 RC = &AArch64::FPR16RegClass;
4243 Opc = AArch64::LDRHui;
4244 break;
4245 default:
4246 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4247 << *CPVal->getType());
4248 return nullptr;
4249 }
4250
4251 MachineInstr *LoadMI = nullptr;
4252 auto &MF = MIRBuilder.getMF();
4253 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4254 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4255 // Use load(literal) for tiny code model.
4256 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4257 } else {
4258 auto Adrp =
4259 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4260 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4261
4262 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4263 .addConstantPoolIndex(
4265
4267 }
4268
4269 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4270 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4272 Size, Align(Size)));
4274 return LoadMI;
4275}
4276
4277/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4278/// size and RB.
4279static std::pair<unsigned, unsigned>
4280getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4281 unsigned Opc, SubregIdx;
4282 if (RB.getID() == AArch64::GPRRegBankID) {
4283 if (EltSize == 8) {
4284 Opc = AArch64::INSvi8gpr;
4285 SubregIdx = AArch64::bsub;
4286 } else if (EltSize == 16) {
4287 Opc = AArch64::INSvi16gpr;
4288 SubregIdx = AArch64::ssub;
4289 } else if (EltSize == 32) {
4290 Opc = AArch64::INSvi32gpr;
4291 SubregIdx = AArch64::ssub;
4292 } else if (EltSize == 64) {
4293 Opc = AArch64::INSvi64gpr;
4294 SubregIdx = AArch64::dsub;
4295 } else {
4296 llvm_unreachable("invalid elt size!");
4297 }
4298 } else {
4299 if (EltSize == 8) {
4300 Opc = AArch64::INSvi8lane;
4301 SubregIdx = AArch64::bsub;
4302 } else if (EltSize == 16) {
4303 Opc = AArch64::INSvi16lane;
4304 SubregIdx = AArch64::hsub;
4305 } else if (EltSize == 32) {
4306 Opc = AArch64::INSvi32lane;
4307 SubregIdx = AArch64::ssub;
4308 } else if (EltSize == 64) {
4309 Opc = AArch64::INSvi64lane;
4310 SubregIdx = AArch64::dsub;
4311 } else {
4312 llvm_unreachable("invalid elt size!");
4313 }
4314 }
4315 return std::make_pair(Opc, SubregIdx);
4316}
4317
4318MachineInstr *AArch64InstructionSelector::emitInstr(
4319 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4320 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4321 const ComplexRendererFns &RenderFns) const {
4322 assert(Opcode && "Expected an opcode?");
4323 assert(!isPreISelGenericOpcode(Opcode) &&
4324 "Function should only be used to produce selected instructions!");
4325 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4326 if (RenderFns)
4327 for (auto &Fn : *RenderFns)
4328 Fn(MI);
4330 return &*MI;
4331}
4332
4333MachineInstr *AArch64InstructionSelector::emitAddSub(
4334 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4335 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4336 MachineIRBuilder &MIRBuilder) const {
4337 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4338 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4339 auto Ty = MRI.getType(LHS.getReg());
4340 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4341 unsigned Size = Ty.getSizeInBits();
4342 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4343 bool Is32Bit = Size == 32;
4344
4345 // INSTRri form with positive arithmetic immediate.
4346 if (auto Fns = selectArithImmed(RHS))
4347 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4348 MIRBuilder, Fns);
4349
4350 // INSTRri form with negative arithmetic immediate.
4351 if (auto Fns = selectNegArithImmed(RHS))
4352 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4353 MIRBuilder, Fns);
4354
4355 // INSTRrx form.
4356 if (auto Fns = selectArithExtendedRegister(RHS))
4357 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4358 MIRBuilder, Fns);
4359
4360 // INSTRrs form.
4361 if (auto Fns = selectShiftedRegister(RHS))
4362 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4363 MIRBuilder, Fns);
4364 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4365 MIRBuilder);
4366}
4367
4368MachineInstr *
4369AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4370 MachineOperand &RHS,
4371 MachineIRBuilder &MIRBuilder) const {
4372 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4373 {{AArch64::ADDXri, AArch64::ADDWri},
4374 {AArch64::ADDXrs, AArch64::ADDWrs},
4375 {AArch64::ADDXrr, AArch64::ADDWrr},
4376 {AArch64::SUBXri, AArch64::SUBWri},
4377 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4378 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4379}
4380
4381MachineInstr *
4382AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4383 MachineOperand &RHS,
4384 MachineIRBuilder &MIRBuilder) const {
4385 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4386 {{AArch64::ADDSXri, AArch64::ADDSWri},
4387 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4388 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4389 {AArch64::SUBSXri, AArch64::SUBSWri},
4390 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4391 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4392}
4393
4394MachineInstr *
4395AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4396 MachineOperand &RHS,
4397 MachineIRBuilder &MIRBuilder) const {
4398 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4399 {{AArch64::SUBSXri, AArch64::SUBSWri},
4400 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4401 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4402 {AArch64::ADDSXri, AArch64::ADDSWri},
4403 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4404 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4405}
4406
4407MachineInstr *
4408AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4409 MachineOperand &RHS,
4410 MachineIRBuilder &MIRBuilder) const {
4411 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4412 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4413 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4414 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4415 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4416}
4417
4418MachineInstr *
4419AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4420 MachineOperand &RHS,
4421 MachineIRBuilder &MIRBuilder) const {
4422 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4423 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4424 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4425 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4426 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4427}
4428
4429MachineInstr *
4430AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4431 MachineIRBuilder &MIRBuilder) const {
4432 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4433 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
4434 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4435 return emitSUBS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4436}
4437
4438MachineInstr *
4439AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4440 MachineIRBuilder &MIRBuilder) const {
4441 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4442 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4443 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4444 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4445}
4446
4447MachineInstr *
4448AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4449 MachineIRBuilder &MIRBuilder) const {
4450 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4451 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4452 LLT Ty = MRI.getType(LHS.getReg());
4453 unsigned RegSize = Ty.getSizeInBits();
4454 bool Is32Bit = (RegSize == 32);
4455 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4456 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4457 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4458 // ANDS needs a logical immediate for its immediate form. Check if we can
4459 // fold one in.
4460 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4461 int64_t Imm = ValAndVReg->Value.getSExtValue();
4462
4464 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4467 return &*TstMI;
4468 }
4469 }
4470
4471 if (auto Fns = selectLogicalShiftedRegister(RHS))
4472 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4473 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4474}
4475
4476MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4477 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4478 MachineIRBuilder &MIRBuilder) const {
4479 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4480 assert(Predicate.isPredicate() && "Expected predicate?");
4481 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4482 LLT CmpTy = MRI.getType(LHS.getReg());
4483 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4484 unsigned Size = CmpTy.getSizeInBits();
4485 (void)Size;
4486 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4487 // Fold the compare into a cmn or tst if possible.
4488 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4489 return FoldCmp;
4490 return emitCMP(LHS, RHS, MIRBuilder);
4491}
4492
4493MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4494 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4495 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4496#ifndef NDEBUG
4497 LLT Ty = MRI.getType(Dst);
4498 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4499 "Expected a 32-bit scalar register?");
4500#endif
4501 const Register ZReg = AArch64::WZR;
4502 AArch64CC::CondCode CC1, CC2;
4503 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4504 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4505 if (CC2 == AArch64CC::AL)
4506 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4507 MIRBuilder);
4508 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4509 Register Def1Reg = MRI.createVirtualRegister(RC);
4510 Register Def2Reg = MRI.createVirtualRegister(RC);
4511 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4512 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4513 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4514 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4516 return &*OrMI;
4517}
4518
4519MachineInstr *AArch64InstructionSelector::emitFPCompare(
4520 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4521 std::optional<CmpInst::Predicate> Pred) const {
4522 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4523 LLT Ty = MRI.getType(LHS);
4524 if (Ty.isVector())
4525 return nullptr;
4526 unsigned OpSize = Ty.getSizeInBits();
4527 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4528
4529 // If this is a compare against +0.0, then we don't have
4530 // to explicitly materialize a constant.
4531 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4532 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4533
4534 auto IsEqualityPred = [](CmpInst::Predicate P) {
4535 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4537 };
4538 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4539 // Try commuting the operands.
4540 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4541 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4542 ShouldUseImm = true;
4543 std::swap(LHS, RHS);
4544 }
4545 }
4546 unsigned CmpOpcTbl[2][3] = {
4547 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4548 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4549 unsigned CmpOpc =
4550 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4551
4552 // Partially build the compare. Decide if we need to add a use for the
4553 // third operand based off whether or not we're comparing against 0.0.
4554 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4556 if (!ShouldUseImm)
4557 CmpMI.addUse(RHS);
4559 return &*CmpMI;
4560}
4561
4562MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4563 std::optional<Register> Dst, Register Op1, Register Op2,
4564 MachineIRBuilder &MIRBuilder) const {
4565 // We implement a vector concat by:
4566 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4567 // 2. Insert the upper vector into the destination's upper element
4568 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4569 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4570
4571 const LLT Op1Ty = MRI.getType(Op1);
4572 const LLT Op2Ty = MRI.getType(Op2);
4573
4574 if (Op1Ty != Op2Ty) {
4575 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4576 return nullptr;
4577 }
4578 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4579
4580 if (Op1Ty.getSizeInBits() >= 128) {
4581 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4582 return nullptr;
4583 }
4584
4585 // At the moment we just support 64 bit vector concats.
4586 if (Op1Ty.getSizeInBits() != 64) {
4587 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4588 return nullptr;
4589 }
4590
4591 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4592 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4593 const TargetRegisterClass *DstRC =
4594 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4595
4596 MachineInstr *WidenedOp1 =
4597 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4598 MachineInstr *WidenedOp2 =
4599 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4600 if (!WidenedOp1 || !WidenedOp2) {
4601 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4602 return nullptr;
4603 }
4604
4605 // Now do the insert of the upper element.
4606 unsigned InsertOpc, InsSubRegIdx;
4607 std::tie(InsertOpc, InsSubRegIdx) =
4608 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4609
4610 if (!Dst)
4611 Dst = MRI.createVirtualRegister(DstRC);
4612 auto InsElt =
4613 MIRBuilder
4614 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4615 .addImm(1) /* Lane index */
4616 .addUse(WidenedOp2->getOperand(0).getReg())
4617 .addImm(0);
4619 return &*InsElt;
4620}
4621
4622MachineInstr *
4623AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4624 Register Src2, AArch64CC::CondCode Pred,
4625 MachineIRBuilder &MIRBuilder) const {
4626 auto &MRI = *MIRBuilder.getMRI();
4627 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4628 // If we used a register class, then this won't necessarily have an LLT.
4629 // Compute the size based off whether or not we have a class or bank.
4630 unsigned Size;
4631 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4632 Size = TRI.getRegSizeInBits(*RC);
4633 else
4634 Size = MRI.getType(Dst).getSizeInBits();
4635 // Some opcodes use s1.
4636 assert(Size <= 64 && "Expected 64 bits or less only!");
4637 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4638 unsigned Opc = OpcTable[Size == 64];
4639 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4641 return &*CSINC;
4642}
4643
4644MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4645 Register CarryReg) {
4646 MachineRegisterInfo *MRI = MIB.getMRI();
4647 unsigned Opcode = I.getOpcode();
4648
4649 // If the instruction is a SUB, we need to negate the carry,
4650 // because borrowing is indicated by carry-flag == 0.
4651 bool NeedsNegatedCarry =
4652 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4653
4654 // If the previous instruction will already produce the correct carry, do not
4655 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4656 // generated during legalization of wide add/sub. This optimization depends on
4657 // these sequences not being interrupted by other instructions.
4658 // We have to select the previous instruction before the carry-using
4659 // instruction is deleted by the calling function, otherwise the previous
4660 // instruction might become dead and would get deleted.
4661 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4662 if (SrcMI == I.getPrevNode()) {
4663 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4664 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4665 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4666 CarrySrcMI->isUnsigned() &&
4667 CarrySrcMI->getCarryOutReg() == CarryReg &&
4668 selectAndRestoreState(*SrcMI))
4669 return nullptr;
4670 }
4671 }
4672
4673 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4674
4675 if (NeedsNegatedCarry) {
4676 // (0 - Carry) sets !C in NZCV when Carry == 1
4677 Register ZReg = AArch64::WZR;
4678 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4679 }
4680
4681 // (Carry - 1) sets !C in NZCV when Carry == 0
4682 auto Fns = select12BitValueWithLeftShift(1);
4683 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4684}
4685
4686bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4687 MachineRegisterInfo &MRI) {
4688 auto &CarryMI = cast<GAddSubCarryOut>(I);
4689
4690 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4691 // Set NZCV carry according to carry-in VReg
4692 emitCarryIn(I, CarryInMI->getCarryInReg());
4693 }
4694
4695 // Emit the operation and get the correct condition code.
4696 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4697 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4698
4699 Register CarryOutReg = CarryMI.getCarryOutReg();
4700
4701 // Don't convert carry-out to VReg if it is never used
4702 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4703 // Now, put the overflow result in the register given by the first operand
4704 // to the overflow op. CSINC increments the result when the predicate is
4705 // false, so to get the increment when it's true, we need to use the
4706 // inverse. In this case, we want to increment when carry is set.
4707 Register ZReg = AArch64::WZR;
4708 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4709 getInvertedCondCode(OpAndCC.second), MIB);
4710 }
4711
4712 I.eraseFromParent();
4713 return true;
4714}
4715
4716std::pair<MachineInstr *, AArch64CC::CondCode>
4717AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4718 MachineOperand &LHS,
4719 MachineOperand &RHS,
4720 MachineIRBuilder &MIRBuilder) const {
4721 switch (Opcode) {
4722 default:
4723 llvm_unreachable("Unexpected opcode!");
4724 case TargetOpcode::G_SADDO:
4725 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4726 case TargetOpcode::G_UADDO:
4727 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4728 case TargetOpcode::G_SSUBO:
4729 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4730 case TargetOpcode::G_USUBO:
4731 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4732 case TargetOpcode::G_SADDE:
4733 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4734 case TargetOpcode::G_UADDE:
4735 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4736 case TargetOpcode::G_SSUBE:
4737 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4738 case TargetOpcode::G_USUBE:
4739 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4740 }
4741}
4742
4743/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4744/// expressed as a conjunction.
4745/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4746/// changing the conditions on the CMP tests.
4747/// (this means we can call emitConjunctionRec() with
4748/// Negate==true on this sub-tree)
4749/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4750/// cannot do the negation naturally. We are required to
4751/// emit the subtree first in this case.
4752/// \param WillNegate Is true if are called when the result of this
4753/// subexpression must be negated. This happens when the
4754/// outer expression is an OR. We can use this fact to know
4755/// that we have a double negation (or (or ...) ...) that
4756/// can be implemented for free.
4757static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4758 bool WillNegate, MachineRegisterInfo &MRI,
4759 unsigned Depth = 0) {
4760 if (!MRI.hasOneNonDBGUse(Val))
4761 return false;
4762 MachineInstr *ValDef = MRI.getVRegDef(Val);
4763 unsigned Opcode = ValDef->getOpcode();
4764 if (isa<GAnyCmp>(ValDef)) {
4765 CanNegate = true;
4766 MustBeFirst = false;
4767 return true;
4768 }
4769 // Protect against exponential runtime and stack overflow.
4770 if (Depth > 6)
4771 return false;
4772 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4773 bool IsOR = Opcode == TargetOpcode::G_OR;
4774 Register O0 = ValDef->getOperand(1).getReg();
4775 Register O1 = ValDef->getOperand(2).getReg();
4776 bool CanNegateL;
4777 bool MustBeFirstL;
4778 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4779 return false;
4780 bool CanNegateR;
4781 bool MustBeFirstR;
4782 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4783 return false;
4784
4785 if (MustBeFirstL && MustBeFirstR)
4786 return false;
4787
4788 if (IsOR) {
4789 // For an OR expression we need to be able to naturally negate at least
4790 // one side or we cannot do the transformation at all.
4791 if (!CanNegateL && !CanNegateR)
4792 return false;
4793 // If we the result of the OR will be negated and we can naturally negate
4794 // the leaves, then this sub-tree as a whole negates naturally.
4795 CanNegate = WillNegate && CanNegateL && CanNegateR;
4796 // If we cannot naturally negate the whole sub-tree, then this must be
4797 // emitted first.
4798 MustBeFirst = !CanNegate;
4799 } else {
4800 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4801 // We cannot naturally negate an AND operation.
4802 CanNegate = false;
4803 MustBeFirst = MustBeFirstL || MustBeFirstR;
4804 }
4805 return true;
4806 }
4807 return false;
4808}
4809
4810MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4813 MachineIRBuilder &MIB) const {
4814 auto &MRI = *MIB.getMRI();
4815 LLT OpTy = MRI.getType(LHS);
4816 unsigned CCmpOpc;
4817 std::optional<ValueAndVReg> C;
4818 if (CmpInst::isIntPredicate(CC)) {
4819 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4821 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4822 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4823 else if (C->Value.ule(31))
4824 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4825 else
4826 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4827 } else {
4828 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4829 OpTy.getSizeInBits() == 64);
4830 switch (OpTy.getSizeInBits()) {
4831 case 16:
4832 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4833 CCmpOpc = AArch64::FCCMPHrr;
4834 break;
4835 case 32:
4836 CCmpOpc = AArch64::FCCMPSrr;
4837 break;
4838 case 64:
4839 CCmpOpc = AArch64::FCCMPDrr;
4840 break;
4841 default:
4842 return nullptr;
4843 }
4844 }
4846 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4847 auto CCmp =
4848 MIB.buildInstr(CCmpOpc, {}, {LHS});
4849 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4850 CCmp.addImm(C->Value.getZExtValue());
4851 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4852 CCmp.addImm(C->Value.abs().getZExtValue());
4853 else
4854 CCmp.addReg(RHS);
4855 CCmp.addImm(NZCV).addImm(Predicate);
4857 return &*CCmp;
4858}
4859
4860MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4861 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4862 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4863 // We're at a tree leaf, produce a conditional comparison operation.
4864 auto &MRI = *MIB.getMRI();
4865 MachineInstr *ValDef = MRI.getVRegDef(Val);
4866 unsigned Opcode = ValDef->getOpcode();
4867 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4868 Register LHS = Cmp->getLHSReg();
4869 Register RHS = Cmp->getRHSReg();
4870 CmpInst::Predicate CC = Cmp->getCond();
4871 if (Negate)
4873 if (isa<GICmp>(Cmp)) {
4874 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4875 } else {
4876 // Handle special FP cases.
4877 AArch64CC::CondCode ExtraCC;
4878 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4879 // Some floating point conditions can't be tested with a single condition
4880 // code. Construct an additional comparison in this case.
4881 if (ExtraCC != AArch64CC::AL) {
4882 MachineInstr *ExtraCmp;
4883 if (!CCOp)
4884 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4885 else
4886 ExtraCmp =
4887 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4888 CCOp = ExtraCmp->getOperand(0).getReg();
4889 Predicate = ExtraCC;
4890 }
4891 }
4892
4893 // Produce a normal comparison if we are first in the chain
4894 if (!CCOp) {
4895 if (isa<GICmp>(Cmp))
4896 return emitCMP(Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4897 return emitFPCompare(Cmp->getOperand(2).getReg(),
4898 Cmp->getOperand(3).getReg(), MIB);
4899 }
4900 // Otherwise produce a ccmp.
4901 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4902 }
4903 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4904
4905 bool IsOR = Opcode == TargetOpcode::G_OR;
4906
4907 Register LHS = ValDef->getOperand(1).getReg();
4908 bool CanNegateL;
4909 bool MustBeFirstL;
4910 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4911 assert(ValidL && "Valid conjunction/disjunction tree");
4912 (void)ValidL;
4913
4914 Register RHS = ValDef->getOperand(2).getReg();
4915 bool CanNegateR;
4916 bool MustBeFirstR;
4917 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4918 assert(ValidR && "Valid conjunction/disjunction tree");
4919 (void)ValidR;
4920
4921 // Swap sub-tree that must come first to the right side.
4922 if (MustBeFirstL) {
4923 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4924 std::swap(LHS, RHS);
4925 std::swap(CanNegateL, CanNegateR);
4926 std::swap(MustBeFirstL, MustBeFirstR);
4927 }
4928
4929 bool NegateR;
4930 bool NegateAfterR;
4931 bool NegateL;
4932 bool NegateAfterAll;
4933 if (Opcode == TargetOpcode::G_OR) {
4934 // Swap the sub-tree that we can negate naturally to the left.
4935 if (!CanNegateL) {
4936 assert(CanNegateR && "at least one side must be negatable");
4937 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4938 assert(!Negate);
4939 std::swap(LHS, RHS);
4940 NegateR = false;
4941 NegateAfterR = true;
4942 } else {
4943 // Negate the left sub-tree if possible, otherwise negate the result.
4944 NegateR = CanNegateR;
4945 NegateAfterR = !CanNegateR;
4946 }
4947 NegateL = true;
4948 NegateAfterAll = !Negate;
4949 } else {
4950 assert(Opcode == TargetOpcode::G_AND &&
4951 "Valid conjunction/disjunction tree");
4952 assert(!Negate && "Valid conjunction/disjunction tree");
4953
4954 NegateL = false;
4955 NegateR = false;
4956 NegateAfterR = false;
4957 NegateAfterAll = false;
4958 }
4959
4960 // Emit sub-trees.
4961 AArch64CC::CondCode RHSCC;
4962 MachineInstr *CmpR =
4963 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4964 if (NegateAfterR)
4965 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4966 MachineInstr *CmpL = emitConjunctionRec(
4967 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4968 if (NegateAfterAll)
4969 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4970 return CmpL;
4971}
4972
4973MachineInstr *AArch64InstructionSelector::emitConjunction(
4974 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4975 bool DummyCanNegate;
4976 bool DummyMustBeFirst;
4977 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4978 *MIB.getMRI()))
4979 return nullptr;
4980 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4981}
4982
4983bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4984 MachineInstr &CondMI) {
4985 AArch64CC::CondCode AArch64CC;
4986 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4987 if (!ConjMI)
4988 return false;
4989
4990 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4991 SelI.eraseFromParent();
4992 return true;
4993}
4994
4995bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4996 MachineRegisterInfo &MRI = *MIB.getMRI();
4997 // We want to recognize this pattern:
4998 //
4999 // $z = G_FCMP pred, $x, $y
5000 // ...
5001 // $w = G_SELECT $z, $a, $b
5002 //
5003 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5004 // some copies/truncs in between.)
5005 //
5006 // If we see this, then we can emit something like this:
5007 //
5008 // fcmp $x, $y
5009 // fcsel $w, $a, $b, pred
5010 //
5011 // Rather than emitting both of the rather long sequences in the standard
5012 // G_FCMP/G_SELECT select methods.
5013
5014 // First, check if the condition is defined by a compare.
5015 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5016
5017 // We can only fold if all of the defs have one use.
5018 Register CondDefReg = CondDef->getOperand(0).getReg();
5019 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5020 // Unless it's another select.
5021 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5022 if (CondDef == &UI)
5023 continue;
5024 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5025 return false;
5026 }
5027 }
5028
5029 // Is the condition defined by a compare?
5030 unsigned CondOpc = CondDef->getOpcode();
5031 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5032 if (tryOptSelectConjunction(I, *CondDef))
5033 return true;
5034 return false;
5035 }
5036
5038 if (CondOpc == TargetOpcode::G_ICMP) {
5039 auto &PredOp = CondDef->getOperand(1);
5040 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5041 MIB);
5042 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5043 CondCode =
5044 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5045 } else {
5046 // Get the condition code for the select.
5047 auto Pred =
5048 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5049 AArch64CC::CondCode CondCode2;
5050 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5051
5052 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5053 // instructions to emit the comparison.
5054 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5055 // unnecessary.
5056 if (CondCode2 != AArch64CC::AL)
5057 return false;
5058
5059 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5060 CondDef->getOperand(3).getReg(), MIB)) {
5061 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5062 return false;
5063 }
5064 }
5065
5066 // Emit the select.
5067 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5068 I.getOperand(3).getReg(), CondCode, MIB);
5069 I.eraseFromParent();
5070 return true;
5071}
5072
5073MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5074 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5075 MachineIRBuilder &MIRBuilder) const {
5076 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5077 "Unexpected MachineOperand");
5078 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5079 // We want to find this sort of thing:
5080 // x = G_SUB 0, y
5081 // G_ICMP z, x
5082 //
5083 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5084 // e.g:
5085 //
5086 // cmn z, y
5087
5088 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5089 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5090 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5091 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5092
5093 // Given this:
5094 //
5095 // x = G_SUB 0, y
5096 // G_ICMP z, x
5097 //
5098 // Produce this:
5099 //
5100 // cmn z, y
5101 if (isCMN(RHSDef, P, MRI))
5102 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5103
5104 // Same idea here, but with the LHS of the compare instead:
5105 //
5106 // Given this:
5107 //
5108 // x = G_SUB 0, y
5109 // G_ICMP x, z
5110 //
5111 // Produce this:
5112 //
5113 // cmn y, z
5114 //
5115 // But be careful! We need to swap the predicate!
5116 if (isCMN(LHSDef, P, MRI)) {
5117 if (!CmpInst::isEquality(P)) {
5120 }
5121 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5122 }
5123
5124 // Given this:
5125 //
5126 // z = G_AND x, y
5127 // G_ICMP z, 0
5128 //
5129 // Produce this if the compare is signed:
5130 //
5131 // tst x, y
5132 if (!CmpInst::isUnsigned(P) && LHSDef &&
5133 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5134 // Make sure that the RHS is 0.
5135 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5136 if (!ValAndVReg || ValAndVReg->Value != 0)
5137 return nullptr;
5138
5139 return emitTST(LHSDef->getOperand(1),
5140 LHSDef->getOperand(2), MIRBuilder);
5141 }
5142
5143 return nullptr;
5144}
5145
5146bool AArch64InstructionSelector::selectShuffleVector(
5147 MachineInstr &I, MachineRegisterInfo &MRI) {
5148 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5149 Register Src1Reg = I.getOperand(1).getReg();
5150 Register Src2Reg = I.getOperand(2).getReg();
5151 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5152
5153 MachineBasicBlock &MBB = *I.getParent();
5154 MachineFunction &MF = *MBB.getParent();
5155 LLVMContext &Ctx = MF.getFunction().getContext();
5156
5157 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5158
5160 for (int Val : Mask) {
5161 // For now, any undef indexes we'll just assume to be 0. This should be
5162 // optimized in future, e.g. to select DUP etc.
5163 Val = Val < 0 ? 0 : Val;
5164 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5165 unsigned Offset = Byte + Val * BytesPerElt;
5166 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5167 }
5168 }
5169
5170 // Use a constant pool to load the index vector for TBL.
5171 Constant *CPVal = ConstantVector::get(CstIdxs);
5172 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5173 if (!IndexLoad) {
5174 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5175 return false;
5176 }
5177
5178 if (DstTy.getSizeInBits() != 128) {
5179 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5180 // This case can be done with TBL1.
5181 MachineInstr *Concat =
5182 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5183 if (!Concat) {
5184 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5185 return false;
5186 }
5187
5188 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5189 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5190 IndexLoad->getOperand(0).getReg(), MIB);
5191
5192 auto TBL1 = MIB.buildInstr(
5193 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5194 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5196
5197 auto Copy =
5198 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5199 .addReg(TBL1.getReg(0), {}, AArch64::dsub);
5200 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5201 I.eraseFromParent();
5202 return true;
5203 }
5204
5205 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5206 // Q registers for regalloc.
5207 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5208 auto RegSeq = createQTuple(Regs, MIB);
5209 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5210 {RegSeq, IndexLoad->getOperand(0)});
5212 I.eraseFromParent();
5213 return true;
5214}
5215
5216MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5217 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5218 unsigned LaneIdx, const RegisterBank &RB,
5219 MachineIRBuilder &MIRBuilder) const {
5220 MachineInstr *InsElt = nullptr;
5221 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5222 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5223
5224 // Create a register to define with the insert if one wasn't passed in.
5225 if (!DstReg)
5226 DstReg = MRI.createVirtualRegister(DstRC);
5227
5228 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5229 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5230
5231 if (RB.getID() == AArch64::FPRRegBankID) {
5232 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5233 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5234 .addImm(LaneIdx)
5235 .addUse(InsSub->getOperand(0).getReg())
5236 .addImm(0);
5237 } else {
5238 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5239 .addImm(LaneIdx)
5240 .addUse(EltReg);
5241 }
5242
5244 return InsElt;
5245}
5246
5247bool AArch64InstructionSelector::selectUSMovFromExtend(
5248 MachineInstr &MI, MachineRegisterInfo &MRI) {
5249 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5250 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5251 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5252 return false;
5253 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5254 const Register DefReg = MI.getOperand(0).getReg();
5255 const LLT DstTy = MRI.getType(DefReg);
5256 unsigned DstSize = DstTy.getSizeInBits();
5257
5258 if (DstSize != 32 && DstSize != 64)
5259 return false;
5260
5261 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5262 MI.getOperand(1).getReg(), MRI);
5263 int64_t Lane;
5264 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5265 return false;
5266 Register Src0 = Extract->getOperand(1).getReg();
5267
5268 const LLT VecTy = MRI.getType(Src0);
5269 if (VecTy.isScalableVector())
5270 return false;
5271
5272 if (VecTy.getSizeInBits() != 128) {
5273 const MachineInstr *ScalarToVector = emitScalarToVector(
5274 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5275 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5276 Src0 = ScalarToVector->getOperand(0).getReg();
5277 }
5278
5279 unsigned Opcode;
5280 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5281 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5282 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5283 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5284 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5285 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5286 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5287 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5288 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5289 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5290 else
5291 llvm_unreachable("Unexpected type combo for S/UMov!");
5292
5293 // We may need to generate one of these, depending on the type and sign of the
5294 // input:
5295 // DstReg = SMOV Src0, Lane;
5296 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5297 MachineInstr *ExtI = nullptr;
5298 if (DstSize == 64 && !IsSigned) {
5299 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5300 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5301 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5302 .addUse(NewReg)
5303 .addImm(AArch64::sub_32);
5304 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5305 } else
5306 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5307
5309 MI.eraseFromParent();
5310 return true;
5311}
5312
5313MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5314 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5315 unsigned int Op;
5316 if (DstSize == 128) {
5317 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5318 return nullptr;
5319 Op = AArch64::MOVIv16b_ns;
5320 } else {
5321 Op = AArch64::MOVIv8b_ns;
5322 }
5323
5324 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5325
5328 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5330 return &*Mov;
5331 }
5332 return nullptr;
5333}
5334
5335MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5336 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5337 bool Inv) {
5338
5339 unsigned int Op;
5340 if (DstSize == 128) {
5341 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5342 return nullptr;
5343 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5344 } else {
5345 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5346 }
5347
5348 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5349 uint64_t Shift;
5350
5353 Shift = 0;
5354 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5356 Shift = 8;
5357 } else
5358 return nullptr;
5359
5360 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5362 return &*Mov;
5363}
5364
5365MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5366 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5367 bool Inv) {
5368
5369 unsigned int Op;
5370 if (DstSize == 128) {
5371 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5372 return nullptr;
5373 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5374 } else {
5375 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5376 }
5377
5378 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5379 uint64_t Shift;
5380
5383 Shift = 0;
5384 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5386 Shift = 8;
5387 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5389 Shift = 16;
5390 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5392 Shift = 24;
5393 } else
5394 return nullptr;
5395
5396 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5398 return &*Mov;
5399}
5400
5401MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5402 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5403
5404 unsigned int Op;
5405 if (DstSize == 128) {
5406 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5407 return nullptr;
5408 Op = AArch64::MOVIv2d_ns;
5409 } else {
5410 Op = AArch64::MOVID;
5411 }
5412
5413 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5416 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5418 return &*Mov;
5419 }
5420 return nullptr;
5421}
5422
5423MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5424 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5425 bool Inv) {
5426
5427 unsigned int Op;
5428 if (DstSize == 128) {
5429 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5430 return nullptr;
5431 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5432 } else {
5433 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5434 }
5435
5436 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5437 uint64_t Shift;
5438
5441 Shift = 264;
5442 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5444 Shift = 272;
5445 } else
5446 return nullptr;
5447
5448 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5450 return &*Mov;
5451}
5452
5453MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5454 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5455
5456 unsigned int Op;
5457 bool IsWide = false;
5458 if (DstSize == 128) {
5459 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5460 return nullptr;
5461 Op = AArch64::FMOVv4f32_ns;
5462 IsWide = true;
5463 } else {
5464 Op = AArch64::FMOVv2f32_ns;
5465 }
5466
5467 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5468
5471 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5473 Op = AArch64::FMOVv2f64_ns;
5474 } else
5475 return nullptr;
5476
5477 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5479 return &*Mov;
5480}
5481
5482bool AArch64InstructionSelector::selectIndexedExtLoad(
5483 MachineInstr &MI, MachineRegisterInfo &MRI) {
5484 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5485 Register Dst = ExtLd.getDstReg();
5486 Register WriteBack = ExtLd.getWritebackReg();
5487 Register Base = ExtLd.getBaseReg();
5488 Register Offset = ExtLd.getOffsetReg();
5489 LLT Ty = MRI.getType(Dst);
5490 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5491 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5492 bool IsPre = ExtLd.isPre();
5493 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5494 unsigned InsertIntoSubReg = 0;
5495 bool IsDst64 = Ty.getSizeInBits() == 64;
5496
5497 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5498 // long as they are scalar.
5499 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5500 if ((IsSExt && IsFPR) || Ty.isVector())
5501 return false;
5502
5503 unsigned Opc = 0;
5504 LLT NewLdDstTy;
5505 LLT s32 = LLT::scalar(32);
5506 LLT s64 = LLT::scalar(64);
5507
5508 if (MemSizeBits == 8) {
5509 if (IsSExt) {
5510 if (IsDst64)
5511 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5512 else
5513 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5514 NewLdDstTy = IsDst64 ? s64 : s32;
5515 } else if (IsFPR) {
5516 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5517 InsertIntoSubReg = AArch64::bsub;
5518 NewLdDstTy = LLT::scalar(MemSizeBits);
5519 } else {
5520 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5521 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5522 NewLdDstTy = s32;
5523 }
5524 } else if (MemSizeBits == 16) {
5525 if (IsSExt) {
5526 if (IsDst64)
5527 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5528 else
5529 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5530 NewLdDstTy = IsDst64 ? s64 : s32;
5531 } else if (IsFPR) {
5532 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5533 InsertIntoSubReg = AArch64::hsub;
5534 NewLdDstTy = LLT::scalar(MemSizeBits);
5535 } else {
5536 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5537 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5538 NewLdDstTy = s32;
5539 }
5540 } else if (MemSizeBits == 32) {
5541 if (IsSExt) {
5542 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5543 NewLdDstTy = s64;
5544 } else if (IsFPR) {
5545 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5546 InsertIntoSubReg = AArch64::ssub;
5547 NewLdDstTy = LLT::scalar(MemSizeBits);
5548 } else {
5549 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5550 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5551 NewLdDstTy = s32;
5552 }
5553 } else {
5554 llvm_unreachable("Unexpected size for indexed load");
5555 }
5556
5557 auto Cst = getIConstantVRegVal(Offset, MRI);
5558 if (!Cst)
5559 return false; // Shouldn't happen, but just in case.
5560
5561 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5562 .addImm(Cst->getSExtValue());
5563 LdMI.cloneMemRefs(ExtLd);
5565 // Make sure to select the load with the MemTy as the dest type, and then
5566 // insert into a larger reg if needed.
5567 if (InsertIntoSubReg) {
5568 // Generate a SUBREG_TO_REG.
5569 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5570 .addUse(LdMI.getReg(1))
5571 .addImm(InsertIntoSubReg);
5573 SubToReg.getReg(0),
5574 *getRegClassForTypeOnBank(MRI.getType(Dst),
5575 *RBI.getRegBank(Dst, MRI, TRI)),
5576 MRI);
5577 } else {
5578 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5579 selectCopy(*Copy, TII, MRI, TRI, RBI);
5580 }
5581 MI.eraseFromParent();
5582
5583 return true;
5584}
5585
5586bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5587 MachineRegisterInfo &MRI) {
5588 auto &Ld = cast<GIndexedLoad>(MI);
5589 Register Dst = Ld.getDstReg();
5590 Register WriteBack = Ld.getWritebackReg();
5591 Register Base = Ld.getBaseReg();
5592 Register Offset = Ld.getOffsetReg();
5593 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5594 "Unexpected type for indexed load");
5595 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5596
5597 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5598 return selectIndexedExtLoad(MI, MRI);
5599
5600 unsigned Opc = 0;
5601 if (Ld.isPre()) {
5602 static constexpr unsigned GPROpcodes[] = {
5603 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5604 AArch64::LDRXpre};
5605 static constexpr unsigned FPROpcodes[] = {
5606 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5607 AArch64::LDRQpre};
5608 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5609 ? FPROpcodes[Log2_32(MemSize)]
5610 : GPROpcodes[Log2_32(MemSize)];
5611 ;
5612 } else {
5613 static constexpr unsigned GPROpcodes[] = {
5614 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5615 AArch64::LDRXpost};
5616 static constexpr unsigned FPROpcodes[] = {
5617 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5618 AArch64::LDRDpost, AArch64::LDRQpost};
5619 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5620 ? FPROpcodes[Log2_32(MemSize)]
5621 : GPROpcodes[Log2_32(MemSize)];
5622 ;
5623 }
5624 auto Cst = getIConstantVRegVal(Offset, MRI);
5625 if (!Cst)
5626 return false; // Shouldn't happen, but just in case.
5627 auto LdMI =
5628 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5629 LdMI.cloneMemRefs(Ld);
5631 MI.eraseFromParent();
5632 return true;
5633}
5634
5635bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5636 MachineRegisterInfo &MRI) {
5637 Register Dst = I.getWritebackReg();
5638 Register Val = I.getValueReg();
5639 Register Base = I.getBaseReg();
5640 Register Offset = I.getOffsetReg();
5641 assert(MRI.getType(Val).getSizeInBits() <= 128 &&
5642 "Unexpected type for indexed store");
5643
5644 LocationSize MemSize = I.getMMO().getSize();
5645 unsigned MemSizeInBytes = MemSize.getValue();
5646
5647 assert(MemSizeInBytes && MemSizeInBytes <= 16 &&
5648 "Unexpected indexed store size");
5649 unsigned MemSizeLog2 = Log2_32(MemSizeInBytes);
5650
5651 unsigned Opc = 0;
5652 if (I.isPre()) {
5653 static constexpr unsigned GPROpcodes[] = {
5654 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5655 AArch64::STRXpre};
5656 static constexpr unsigned FPROpcodes[] = {
5657 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5658 AArch64::STRQpre};
5659
5660 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5661 Opc = FPROpcodes[MemSizeLog2];
5662 else
5663 Opc = GPROpcodes[MemSizeLog2];
5664 } else {
5665 static constexpr unsigned GPROpcodes[] = {
5666 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5667 AArch64::STRXpost};
5668 static constexpr unsigned FPROpcodes[] = {
5669 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5670 AArch64::STRDpost, AArch64::STRQpost};
5671
5672 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5673 Opc = FPROpcodes[MemSizeLog2];
5674 else
5675 Opc = GPROpcodes[MemSizeLog2];
5676 }
5677
5678 auto Cst = getIConstantVRegVal(Offset, MRI);
5679 if (!Cst)
5680 return false; // Shouldn't happen, but just in case.
5681 auto Str =
5682 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5683 Str.cloneMemRefs(I);
5685 I.eraseFromParent();
5686 return true;
5687}
5688
5689MachineInstr *
5690AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5691 MachineIRBuilder &MIRBuilder,
5692 MachineRegisterInfo &MRI) {
5693 LLT DstTy = MRI.getType(Dst);
5694 unsigned DstSize = DstTy.getSizeInBits();
5695 assert((DstSize == 64 || DstSize == 128) &&
5696 "Unexpected vector constant size");
5697
5698 if (CV->isNullValue()) {
5699 if (DstSize == 128) {
5700 auto Mov =
5701 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5703 return &*Mov;
5704 }
5705
5706 if (DstSize == 64) {
5707 auto Mov =
5708 MIRBuilder
5709 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5710 .addImm(0);
5711 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5712 .addReg(Mov.getReg(0), {}, AArch64::dsub);
5713 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5714 return &*Copy;
5715 }
5716 }
5717
5718 if (Constant *SplatValue = CV->getSplatValue()) {
5719 APInt SplatValueAsInt =
5720 isa<ConstantFP>(SplatValue)
5721 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5722 : SplatValue->getUniqueInteger();
5723 APInt DefBits = APInt::getSplat(
5724 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5725 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5726 MachineInstr *NewOp;
5727 bool Inv = false;
5728 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5729 (NewOp =
5730 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5731 (NewOp =
5732 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5733 (NewOp =
5734 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5735 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5736 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5737 return NewOp;
5738
5739 DefBits = ~DefBits;
5740 Inv = true;
5741 if ((NewOp =
5742 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5743 (NewOp =
5744 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5745 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5746 return NewOp;
5747 return nullptr;
5748 };
5749
5750 if (auto *NewOp = TryMOVIWithBits(DefBits))
5751 return NewOp;
5752
5753 // See if a fneg of the constant can be materialized with a MOVI, etc
5754 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5755 unsigned NegOpc) -> MachineInstr * {
5756 // FNegate each sub-element of the constant
5757 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5758 APInt NegBits(DstSize, 0);
5759 unsigned NumElts = DstSize / NumBits;
5760 for (unsigned i = 0; i < NumElts; i++)
5761 NegBits |= Neg << (NumBits * i);
5762 NegBits = DefBits ^ NegBits;
5763
5764 // Try to create the new constants with MOVI, and if so generate a fneg
5765 // for it.
5766 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5767 Register NewDst = MRI.createVirtualRegister(
5768 DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5769 NewOp->getOperand(0).setReg(NewDst);
5770 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5771 }
5772 return nullptr;
5773 };
5774 MachineInstr *R;
5775 if ((R = TryWithFNeg(DefBits, 32,
5776 DstSize == 64 ? AArch64::FNEGv2f32
5777 : AArch64::FNEGv4f32)) ||
5778 (R = TryWithFNeg(DefBits, 64,
5779 DstSize == 64 ? AArch64::FNEGDr
5780 : AArch64::FNEGv2f64)) ||
5781 (STI.hasFullFP16() &&
5782 (R = TryWithFNeg(DefBits, 16,
5783 DstSize == 64 ? AArch64::FNEGv4f16
5784 : AArch64::FNEGv8f16))))
5785 return R;
5786 }
5787
5788 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5789 if (!CPLoad) {
5790 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5791 return nullptr;
5792 }
5793
5794 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5796 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5797 return &*Copy;
5798}
5799
5800bool AArch64InstructionSelector::tryOptConstantBuildVec(
5801 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5802 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5803 unsigned DstSize = DstTy.getSizeInBits();
5804 assert(DstSize <= 128 && "Unexpected build_vec type!");
5805 if (DstSize < 32)
5806 return false;
5807 // Check if we're building a constant vector, in which case we want to
5808 // generate a constant pool load instead of a vector insert sequence.
5810 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5811 Register OpReg = I.getOperand(Idx).getReg();
5812 if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
5813 OpReg, MRI, /*LookThroughInstrs=*/true,
5814 /*LookThroughAnyExt=*/true)) {
5815 MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
5816
5817 if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
5818 Csts.emplace_back(
5819 ConstantInt::get(MIB.getMF().getFunction().getContext(),
5820 std::move(AnyConst->Value)));
5821 continue;
5822 }
5823
5824 if (DefMI->getOpcode() == TargetOpcode::G_FCONSTANT) {
5825 Csts.emplace_back(
5826 const_cast<ConstantFP *>(DefMI->getOperand(1).getFPImm()));
5827 continue;
5828 }
5829 }
5830 return false;
5831 }
5832 Constant *CV = ConstantVector::get(Csts);
5833 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5834 return false;
5835 I.eraseFromParent();
5836 return true;
5837}
5838
5839bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5840 MachineInstr &I, MachineRegisterInfo &MRI) {
5841 // Given:
5842 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5843 //
5844 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5845 Register Dst = I.getOperand(0).getReg();
5846 Register EltReg = I.getOperand(1).getReg();
5847 LLT EltTy = MRI.getType(EltReg);
5848 // If the index isn't on the same bank as its elements, then this can't be a
5849 // SUBREG_TO_REG.
5850 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5851 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5852 if (EltRB != DstRB)
5853 return false;
5854 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5855 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5856 }))
5857 return false;
5858 unsigned SubReg;
5859 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5860 if (!EltRC)
5861 return false;
5862 const TargetRegisterClass *DstRC =
5863 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5864 if (!DstRC)
5865 return false;
5866 if (!getSubRegForClass(EltRC, TRI, SubReg))
5867 return false;
5868 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5869 .addUse(EltReg)
5870 .addImm(SubReg);
5871 I.eraseFromParent();
5872 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5873 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5874}
5875
5876bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5877 MachineRegisterInfo &MRI) {
5878 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5879 // Until we port more of the optimized selections, for now just use a vector
5880 // insert sequence.
5881 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5882 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5883 unsigned EltSize = EltTy.getSizeInBits();
5884
5885 if (tryOptConstantBuildVec(I, DstTy, MRI))
5886 return true;
5887 if (tryOptBuildVecToSubregToReg(I, MRI))
5888 return true;
5889
5890 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5891 return false; // Don't support all element types yet.
5892 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5893
5894 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5895 MachineInstr *ScalarToVec =
5896 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5897 I.getOperand(1).getReg(), MIB);
5898 if (!ScalarToVec)
5899 return false;
5900
5901 Register DstVec = ScalarToVec->getOperand(0).getReg();
5902 unsigned DstSize = DstTy.getSizeInBits();
5903
5904 // Keep track of the last MI we inserted. Later on, we might be able to save
5905 // a copy using it.
5906 MachineInstr *PrevMI = ScalarToVec;
5907 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5908 // Note that if we don't do a subregister copy, we can end up making an
5909 // extra register.
5910 Register OpReg = I.getOperand(i).getReg();
5911 // Do not emit inserts for undefs
5912 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5913 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5914 DstVec = PrevMI->getOperand(0).getReg();
5915 }
5916 }
5917
5918 // If DstTy's size in bits is less than 128, then emit a subregister copy
5919 // from DstVec to the last register we've defined.
5920 if (DstSize < 128) {
5921 // Force this to be FPR using the destination vector.
5922 const TargetRegisterClass *RC =
5923 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5924 if (!RC)
5925 return false;
5926 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5927 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5928 return false;
5929 }
5930
5931 unsigned SubReg = 0;
5932 if (!getSubRegForClass(RC, TRI, SubReg))
5933 return false;
5934 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5935 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5936 << "\n");
5937 return false;
5938 }
5939
5941 Register DstReg = I.getOperand(0).getReg();
5942
5943 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, {}, SubReg);
5944 MachineOperand &RegOp = I.getOperand(1);
5945 RegOp.setReg(Reg);
5946 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5947 } else {
5948 // We either have a vector with all elements (except the first one) undef or
5949 // at least one non-undef non-first element. In the first case, we need to
5950 // constrain the output register ourselves as we may have generated an
5951 // INSERT_SUBREG operation which is a generic operation for which the
5952 // output regclass cannot be automatically chosen.
5953 //
5954 // In the second case, there is no need to do this as it may generate an
5955 // instruction like INSvi32gpr where the regclass can be automatically
5956 // chosen.
5957 //
5958 // Also, we save a copy by re-using the destination register on the final
5959 // insert.
5960 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5962
5963 Register DstReg = PrevMI->getOperand(0).getReg();
5964 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5965 const TargetRegisterClass *RC =
5966 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5967 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5968 }
5969 }
5970
5972 return true;
5973}
5974
5975bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5976 unsigned NumVecs,
5977 MachineInstr &I) {
5978 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5979 assert(Opc && "Expected an opcode?");
5980 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5981 auto &MRI = *MIB.getMRI();
5982 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5983 unsigned Size = Ty.getSizeInBits();
5984 assert((Size == 64 || Size == 128) &&
5985 "Destination must be 64 bits or 128 bits?");
5986 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5987 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5988 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5989 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5990 Load.cloneMemRefs(I);
5992 Register SelectedLoadDst = Load->getOperand(0).getReg();
5993 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5994 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5995 .addReg(SelectedLoadDst, {}, SubReg + Idx);
5996 // Emit the subreg copies and immediately select them.
5997 // FIXME: We should refactor our copy code into an emitCopy helper and
5998 // clean up uses of this pattern elsewhere in the selector.
5999 selectCopy(*Vec, TII, MRI, TRI, RBI);
6000 }
6001 return true;
6002}
6003
6004bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6005 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6006 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6007 assert(Opc && "Expected an opcode?");
6008 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6009 auto &MRI = *MIB.getMRI();
6010 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6011 bool Narrow = Ty.getSizeInBits() == 64;
6012
6013 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
6014 SmallVector<Register, 4> Regs(NumVecs);
6015 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
6016 [](auto MO) { return MO.getReg(); });
6017
6018 if (Narrow) {
6019 transform(Regs, Regs.begin(), [this](Register Reg) {
6020 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6021 ->getOperand(0)
6022 .getReg();
6023 });
6024 Ty = Ty.multiplyElements(2);
6025 }
6026
6027 Register Tuple = createQTuple(Regs, MIB);
6028 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6029 if (!LaneNo)
6030 return false;
6031
6032 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6033 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6034 .addReg(Tuple)
6035 .addImm(LaneNo->getZExtValue())
6036 .addReg(Ptr);
6037 Load.cloneMemRefs(I);
6039 Register SelectedLoadDst = Load->getOperand(0).getReg();
6040 unsigned SubReg = AArch64::qsub0;
6041 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6042 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6043 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6044 : DstOp(I.getOperand(Idx).getReg())},
6045 {})
6046 .addReg(SelectedLoadDst, {}, SubReg + Idx);
6047 Register WideReg = Vec.getReg(0);
6048 // Emit the subreg copies and immediately select them.
6049 selectCopy(*Vec, TII, MRI, TRI, RBI);
6050 if (Narrow &&
6051 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6052 return false;
6053 }
6054 return true;
6055}
6056
6057void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6058 unsigned NumVecs,
6059 unsigned Opc) {
6060 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6061 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6062 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6063
6064 SmallVector<Register, 2> Regs(NumVecs);
6065 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6066 Regs.begin(), [](auto MO) { return MO.getReg(); });
6067
6068 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6069 : createDTuple(Regs, MIB);
6070 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6071 Store.cloneMemRefs(I);
6073}
6074
6075bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6076 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6077 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6078 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6079 bool Narrow = Ty.getSizeInBits() == 64;
6080
6081 SmallVector<Register, 2> Regs(NumVecs);
6082 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6083 Regs.begin(), [](auto MO) { return MO.getReg(); });
6084
6085 if (Narrow)
6086 transform(Regs, Regs.begin(), [this](Register Reg) {
6087 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6088 ->getOperand(0)
6089 .getReg();
6090 });
6091
6092 Register Tuple = createQTuple(Regs, MIB);
6093
6094 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6095 if (!LaneNo)
6096 return false;
6097 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6098 auto Store = MIB.buildInstr(Opc, {}, {})
6099 .addReg(Tuple)
6100 .addImm(LaneNo->getZExtValue())
6101 .addReg(Ptr);
6102 Store.cloneMemRefs(I);
6104 return true;
6105}
6106
6107bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6108 MachineInstr &I, MachineRegisterInfo &MRI) {
6109 // Find the intrinsic ID.
6110 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6111
6112 const LLT S8 = LLT::scalar(8);
6113 const LLT S16 = LLT::scalar(16);
6114 const LLT S32 = LLT::scalar(32);
6115 const LLT S64 = LLT::scalar(64);
6116 const LLT P0 = LLT::pointer(0, 64);
6117 // Select the instruction.
6118 switch (IntrinID) {
6119 default:
6120 return false;
6121 case Intrinsic::aarch64_ldxp:
6122 case Intrinsic::aarch64_ldaxp: {
6123 auto NewI = MIB.buildInstr(
6124 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6125 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6126 {I.getOperand(3)});
6127 NewI.cloneMemRefs(I);
6129 break;
6130 }
6131 case Intrinsic::aarch64_neon_ld1x2: {
6132 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6133 unsigned Opc = 0;
6134 if (Ty == LLT::fixed_vector(8, S8))
6135 Opc = AArch64::LD1Twov8b;
6136 else if (Ty == LLT::fixed_vector(16, S8))
6137 Opc = AArch64::LD1Twov16b;
6138 else if (Ty == LLT::fixed_vector(4, S16))
6139 Opc = AArch64::LD1Twov4h;
6140 else if (Ty == LLT::fixed_vector(8, S16))
6141 Opc = AArch64::LD1Twov8h;
6142 else if (Ty == LLT::fixed_vector(2, S32))
6143 Opc = AArch64::LD1Twov2s;
6144 else if (Ty == LLT::fixed_vector(4, S32))
6145 Opc = AArch64::LD1Twov4s;
6146 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6147 Opc = AArch64::LD1Twov2d;
6148 else if (Ty == S64 || Ty == P0)
6149 Opc = AArch64::LD1Twov1d;
6150 else
6151 llvm_unreachable("Unexpected type for ld1x2!");
6152 selectVectorLoadIntrinsic(Opc, 2, I);
6153 break;
6154 }
6155 case Intrinsic::aarch64_neon_ld1x3: {
6156 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6157 unsigned Opc = 0;
6158 if (Ty == LLT::fixed_vector(8, S8))
6159 Opc = AArch64::LD1Threev8b;
6160 else if (Ty == LLT::fixed_vector(16, S8))
6161 Opc = AArch64::LD1Threev16b;
6162 else if (Ty == LLT::fixed_vector(4, S16))
6163 Opc = AArch64::LD1Threev4h;
6164 else if (Ty == LLT::fixed_vector(8, S16))
6165 Opc = AArch64::LD1Threev8h;
6166 else if (Ty == LLT::fixed_vector(2, S32))
6167 Opc = AArch64::LD1Threev2s;
6168 else if (Ty == LLT::fixed_vector(4, S32))
6169 Opc = AArch64::LD1Threev4s;
6170 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6171 Opc = AArch64::LD1Threev2d;
6172 else if (Ty == S64 || Ty == P0)
6173 Opc = AArch64::LD1Threev1d;
6174 else
6175 llvm_unreachable("Unexpected type for ld1x3!");
6176 selectVectorLoadIntrinsic(Opc, 3, I);
6177 break;
6178 }
6179 case Intrinsic::aarch64_neon_ld1x4: {
6180 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6181 unsigned Opc = 0;
6182 if (Ty == LLT::fixed_vector(8, S8))
6183 Opc = AArch64::LD1Fourv8b;
6184 else if (Ty == LLT::fixed_vector(16, S8))
6185 Opc = AArch64::LD1Fourv16b;
6186 else if (Ty == LLT::fixed_vector(4, S16))
6187 Opc = AArch64::LD1Fourv4h;
6188 else if (Ty == LLT::fixed_vector(8, S16))
6189 Opc = AArch64::LD1Fourv8h;
6190 else if (Ty == LLT::fixed_vector(2, S32))
6191 Opc = AArch64::LD1Fourv2s;
6192 else if (Ty == LLT::fixed_vector(4, S32))
6193 Opc = AArch64::LD1Fourv4s;
6194 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6195 Opc = AArch64::LD1Fourv2d;
6196 else if (Ty == S64 || Ty == P0)
6197 Opc = AArch64::LD1Fourv1d;
6198 else
6199 llvm_unreachable("Unexpected type for ld1x4!");
6200 selectVectorLoadIntrinsic(Opc, 4, I);
6201 break;
6202 }
6203 case Intrinsic::aarch64_neon_ld2: {
6204 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6205 unsigned Opc = 0;
6206 if (Ty == LLT::fixed_vector(8, S8))
6207 Opc = AArch64::LD2Twov8b;
6208 else if (Ty == LLT::fixed_vector(16, S8))
6209 Opc = AArch64::LD2Twov16b;
6210 else if (Ty == LLT::fixed_vector(4, S16))
6211 Opc = AArch64::LD2Twov4h;
6212 else if (Ty == LLT::fixed_vector(8, S16))
6213 Opc = AArch64::LD2Twov8h;
6214 else if (Ty == LLT::fixed_vector(2, S32))
6215 Opc = AArch64::LD2Twov2s;
6216 else if (Ty == LLT::fixed_vector(4, S32))
6217 Opc = AArch64::LD2Twov4s;
6218 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6219 Opc = AArch64::LD2Twov2d;
6220 else if (Ty == S64 || Ty == P0)
6221 Opc = AArch64::LD1Twov1d;
6222 else
6223 llvm_unreachable("Unexpected type for ld2!");
6224 selectVectorLoadIntrinsic(Opc, 2, I);
6225 break;
6226 }
6227 case Intrinsic::aarch64_neon_ld2lane: {
6228 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6229 unsigned Opc;
6230 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6231 Opc = AArch64::LD2i8;
6232 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6233 Opc = AArch64::LD2i16;
6234 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6235 Opc = AArch64::LD2i32;
6236 else if (Ty == LLT::fixed_vector(2, S64) ||
6237 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6238 Opc = AArch64::LD2i64;
6239 else
6240 llvm_unreachable("Unexpected type for st2lane!");
6241 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6242 return false;
6243 break;
6244 }
6245 case Intrinsic::aarch64_neon_ld2r: {
6246 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6247 unsigned Opc = 0;
6248 if (Ty == LLT::fixed_vector(8, S8))
6249 Opc = AArch64::LD2Rv8b;
6250 else if (Ty == LLT::fixed_vector(16, S8))
6251 Opc = AArch64::LD2Rv16b;
6252 else if (Ty == LLT::fixed_vector(4, S16))
6253 Opc = AArch64::LD2Rv4h;
6254 else if (Ty == LLT::fixed_vector(8, S16))
6255 Opc = AArch64::LD2Rv8h;
6256 else if (Ty == LLT::fixed_vector(2, S32))
6257 Opc = AArch64::LD2Rv2s;
6258 else if (Ty == LLT::fixed_vector(4, S32))
6259 Opc = AArch64::LD2Rv4s;
6260 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6261 Opc = AArch64::LD2Rv2d;
6262 else if (Ty == S64 || Ty == P0)
6263 Opc = AArch64::LD2Rv1d;
6264 else
6265 llvm_unreachable("Unexpected type for ld2r!");
6266 selectVectorLoadIntrinsic(Opc, 2, I);
6267 break;
6268 }
6269 case Intrinsic::aarch64_neon_ld3: {
6270 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6271 unsigned Opc = 0;
6272 if (Ty == LLT::fixed_vector(8, S8))
6273 Opc = AArch64::LD3Threev8b;
6274 else if (Ty == LLT::fixed_vector(16, S8))
6275 Opc = AArch64::LD3Threev16b;
6276 else if (Ty == LLT::fixed_vector(4, S16))
6277 Opc = AArch64::LD3Threev4h;
6278 else if (Ty == LLT::fixed_vector(8, S16))
6279 Opc = AArch64::LD3Threev8h;
6280 else if (Ty == LLT::fixed_vector(2, S32))
6281 Opc = AArch64::LD3Threev2s;
6282 else if (Ty == LLT::fixed_vector(4, S32))
6283 Opc = AArch64::LD3Threev4s;
6284 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6285 Opc = AArch64::LD3Threev2d;
6286 else if (Ty == S64 || Ty == P0)
6287 Opc = AArch64::LD1Threev1d;
6288 else
6289 llvm_unreachable("Unexpected type for ld3!");
6290 selectVectorLoadIntrinsic(Opc, 3, I);
6291 break;
6292 }
6293 case Intrinsic::aarch64_neon_ld3lane: {
6294 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6295 unsigned Opc;
6296 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6297 Opc = AArch64::LD3i8;
6298 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6299 Opc = AArch64::LD3i16;
6300 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6301 Opc = AArch64::LD3i32;
6302 else if (Ty == LLT::fixed_vector(2, S64) ||
6303 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6304 Opc = AArch64::LD3i64;
6305 else
6306 llvm_unreachable("Unexpected type for st3lane!");
6307 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6308 return false;
6309 break;
6310 }
6311 case Intrinsic::aarch64_neon_ld3r: {
6312 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6313 unsigned Opc = 0;
6314 if (Ty == LLT::fixed_vector(8, S8))
6315 Opc = AArch64::LD3Rv8b;
6316 else if (Ty == LLT::fixed_vector(16, S8))
6317 Opc = AArch64::LD3Rv16b;
6318 else if (Ty == LLT::fixed_vector(4, S16))
6319 Opc = AArch64::LD3Rv4h;
6320 else if (Ty == LLT::fixed_vector(8, S16))
6321 Opc = AArch64::LD3Rv8h;
6322 else if (Ty == LLT::fixed_vector(2, S32))
6323 Opc = AArch64::LD3Rv2s;
6324 else if (Ty == LLT::fixed_vector(4, S32))
6325 Opc = AArch64::LD3Rv4s;
6326 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6327 Opc = AArch64::LD3Rv2d;
6328 else if (Ty == S64 || Ty == P0)
6329 Opc = AArch64::LD3Rv1d;
6330 else
6331 llvm_unreachable("Unexpected type for ld3r!");
6332 selectVectorLoadIntrinsic(Opc, 3, I);
6333 break;
6334 }
6335 case Intrinsic::aarch64_neon_ld4: {
6336 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6337 unsigned Opc = 0;
6338 if (Ty == LLT::fixed_vector(8, S8))
6339 Opc = AArch64::LD4Fourv8b;
6340 else if (Ty == LLT::fixed_vector(16, S8))
6341 Opc = AArch64::LD4Fourv16b;
6342 else if (Ty == LLT::fixed_vector(4, S16))
6343 Opc = AArch64::LD4Fourv4h;
6344 else if (Ty == LLT::fixed_vector(8, S16))
6345 Opc = AArch64::LD4Fourv8h;
6346 else if (Ty == LLT::fixed_vector(2, S32))
6347 Opc = AArch64::LD4Fourv2s;
6348 else if (Ty == LLT::fixed_vector(4, S32))
6349 Opc = AArch64::LD4Fourv4s;
6350 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6351 Opc = AArch64::LD4Fourv2d;
6352 else if (Ty == S64 || Ty == P0)
6353 Opc = AArch64::LD1Fourv1d;
6354 else
6355 llvm_unreachable("Unexpected type for ld4!");
6356 selectVectorLoadIntrinsic(Opc, 4, I);
6357 break;
6358 }
6359 case Intrinsic::aarch64_neon_ld4lane: {
6360 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6361 unsigned Opc;
6362 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6363 Opc = AArch64::LD4i8;
6364 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6365 Opc = AArch64::LD4i16;
6366 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6367 Opc = AArch64::LD4i32;
6368 else if (Ty == LLT::fixed_vector(2, S64) ||
6369 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6370 Opc = AArch64::LD4i64;
6371 else
6372 llvm_unreachable("Unexpected type for st4lane!");
6373 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6374 return false;
6375 break;
6376 }
6377 case Intrinsic::aarch64_neon_ld4r: {
6378 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6379 unsigned Opc = 0;
6380 if (Ty == LLT::fixed_vector(8, S8))
6381 Opc = AArch64::LD4Rv8b;
6382 else if (Ty == LLT::fixed_vector(16, S8))
6383 Opc = AArch64::LD4Rv16b;
6384 else if (Ty == LLT::fixed_vector(4, S16))
6385 Opc = AArch64::LD4Rv4h;
6386 else if (Ty == LLT::fixed_vector(8, S16))
6387 Opc = AArch64::LD4Rv8h;
6388 else if (Ty == LLT::fixed_vector(2, S32))
6389 Opc = AArch64::LD4Rv2s;
6390 else if (Ty == LLT::fixed_vector(4, S32))
6391 Opc = AArch64::LD4Rv4s;
6392 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6393 Opc = AArch64::LD4Rv2d;
6394 else if (Ty == S64 || Ty == P0)
6395 Opc = AArch64::LD4Rv1d;
6396 else
6397 llvm_unreachable("Unexpected type for ld4r!");
6398 selectVectorLoadIntrinsic(Opc, 4, I);
6399 break;
6400 }
6401 case Intrinsic::aarch64_neon_st1x2: {
6402 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6403 unsigned Opc;
6404 if (Ty == LLT::fixed_vector(8, S8))
6405 Opc = AArch64::ST1Twov8b;
6406 else if (Ty == LLT::fixed_vector(16, S8))
6407 Opc = AArch64::ST1Twov16b;
6408 else if (Ty == LLT::fixed_vector(4, S16))
6409 Opc = AArch64::ST1Twov4h;
6410 else if (Ty == LLT::fixed_vector(8, S16))
6411 Opc = AArch64::ST1Twov8h;
6412 else if (Ty == LLT::fixed_vector(2, S32))
6413 Opc = AArch64::ST1Twov2s;
6414 else if (Ty == LLT::fixed_vector(4, S32))
6415 Opc = AArch64::ST1Twov4s;
6416 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6417 Opc = AArch64::ST1Twov2d;
6418 else if (Ty == S64 || Ty == P0)
6419 Opc = AArch64::ST1Twov1d;
6420 else
6421 llvm_unreachable("Unexpected type for st1x2!");
6422 selectVectorStoreIntrinsic(I, 2, Opc);
6423 break;
6424 }
6425 case Intrinsic::aarch64_neon_st1x3: {
6426 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6427 unsigned Opc;
6428 if (Ty == LLT::fixed_vector(8, S8))
6429 Opc = AArch64::ST1Threev8b;
6430 else if (Ty == LLT::fixed_vector(16, S8))
6431 Opc = AArch64::ST1Threev16b;
6432 else if (Ty == LLT::fixed_vector(4, S16))
6433 Opc = AArch64::ST1Threev4h;
6434 else if (Ty == LLT::fixed_vector(8, S16))
6435 Opc = AArch64::ST1Threev8h;
6436 else if (Ty == LLT::fixed_vector(2, S32))
6437 Opc = AArch64::ST1Threev2s;
6438 else if (Ty == LLT::fixed_vector(4, S32))
6439 Opc = AArch64::ST1Threev4s;
6440 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6441 Opc = AArch64::ST1Threev2d;
6442 else if (Ty == S64 || Ty == P0)
6443 Opc = AArch64::ST1Threev1d;
6444 else
6445 llvm_unreachable("Unexpected type for st1x3!");
6446 selectVectorStoreIntrinsic(I, 3, Opc);
6447 break;
6448 }
6449 case Intrinsic::aarch64_neon_st1x4: {
6450 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6451 unsigned Opc;
6452 if (Ty == LLT::fixed_vector(8, S8))
6453 Opc = AArch64::ST1Fourv8b;
6454 else if (Ty == LLT::fixed_vector(16, S8))
6455 Opc = AArch64::ST1Fourv16b;
6456 else if (Ty == LLT::fixed_vector(4, S16))
6457 Opc = AArch64::ST1Fourv4h;
6458 else if (Ty == LLT::fixed_vector(8, S16))
6459 Opc = AArch64::ST1Fourv8h;
6460 else if (Ty == LLT::fixed_vector(2, S32))
6461 Opc = AArch64::ST1Fourv2s;
6462 else if (Ty == LLT::fixed_vector(4, S32))
6463 Opc = AArch64::ST1Fourv4s;
6464 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6465 Opc = AArch64::ST1Fourv2d;
6466 else if (Ty == S64 || Ty == P0)
6467 Opc = AArch64::ST1Fourv1d;
6468 else
6469 llvm_unreachable("Unexpected type for st1x4!");
6470 selectVectorStoreIntrinsic(I, 4, Opc);
6471 break;
6472 }
6473 case Intrinsic::aarch64_neon_st2: {
6474 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6475 unsigned Opc;
6476 if (Ty == LLT::fixed_vector(8, S8))
6477 Opc = AArch64::ST2Twov8b;
6478 else if (Ty == LLT::fixed_vector(16, S8))
6479 Opc = AArch64::ST2Twov16b;
6480 else if (Ty == LLT::fixed_vector(4, S16))
6481 Opc = AArch64::ST2Twov4h;
6482 else if (Ty == LLT::fixed_vector(8, S16))
6483 Opc = AArch64::ST2Twov8h;
6484 else if (Ty == LLT::fixed_vector(2, S32))
6485 Opc = AArch64::ST2Twov2s;
6486 else if (Ty == LLT::fixed_vector(4, S32))
6487 Opc = AArch64::ST2Twov4s;
6488 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6489 Opc = AArch64::ST2Twov2d;
6490 else if (Ty == S64 || Ty == P0)
6491 Opc = AArch64::ST1Twov1d;
6492 else
6493 llvm_unreachable("Unexpected type for st2!");
6494 selectVectorStoreIntrinsic(I, 2, Opc);
6495 break;
6496 }
6497 case Intrinsic::aarch64_neon_st3: {
6498 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6499 unsigned Opc;
6500 if (Ty == LLT::fixed_vector(8, S8))
6501 Opc = AArch64::ST3Threev8b;
6502 else if (Ty == LLT::fixed_vector(16, S8))
6503 Opc = AArch64::ST3Threev16b;
6504 else if (Ty == LLT::fixed_vector(4, S16))
6505 Opc = AArch64::ST3Threev4h;
6506 else if (Ty == LLT::fixed_vector(8, S16))
6507 Opc = AArch64::ST3Threev8h;
6508 else if (Ty == LLT::fixed_vector(2, S32))
6509 Opc = AArch64::ST3Threev2s;
6510 else if (Ty == LLT::fixed_vector(4, S32))
6511 Opc = AArch64::ST3Threev4s;
6512 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6513 Opc = AArch64::ST3Threev2d;
6514 else if (Ty == S64 || Ty == P0)
6515 Opc = AArch64::ST1Threev1d;
6516 else
6517 llvm_unreachable("Unexpected type for st3!");
6518 selectVectorStoreIntrinsic(I, 3, Opc);
6519 break;
6520 }
6521 case Intrinsic::aarch64_neon_st4: {
6522 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6523 unsigned Opc;
6524 if (Ty == LLT::fixed_vector(8, S8))
6525 Opc = AArch64::ST4Fourv8b;
6526 else if (Ty == LLT::fixed_vector(16, S8))
6527 Opc = AArch64::ST4Fourv16b;
6528 else if (Ty == LLT::fixed_vector(4, S16))
6529 Opc = AArch64::ST4Fourv4h;
6530 else if (Ty == LLT::fixed_vector(8, S16))
6531 Opc = AArch64::ST4Fourv8h;
6532 else if (Ty == LLT::fixed_vector(2, S32))
6533 Opc = AArch64::ST4Fourv2s;
6534 else if (Ty == LLT::fixed_vector(4, S32))
6535 Opc = AArch64::ST4Fourv4s;
6536 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6537 Opc = AArch64::ST4Fourv2d;
6538 else if (Ty == S64 || Ty == P0)
6539 Opc = AArch64::ST1Fourv1d;
6540 else
6541 llvm_unreachable("Unexpected type for st4!");
6542 selectVectorStoreIntrinsic(I, 4, Opc);
6543 break;
6544 }
6545 case Intrinsic::aarch64_neon_st2lane: {
6546 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6547 unsigned Opc;
6548 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6549 Opc = AArch64::ST2i8;
6550 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6551 Opc = AArch64::ST2i16;
6552 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6553 Opc = AArch64::ST2i32;
6554 else if (Ty == LLT::fixed_vector(2, S64) ||
6555 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6556 Opc = AArch64::ST2i64;
6557 else
6558 llvm_unreachable("Unexpected type for st2lane!");
6559 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6560 return false;
6561 break;
6562 }
6563 case Intrinsic::aarch64_neon_st3lane: {
6564 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6565 unsigned Opc;
6566 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6567 Opc = AArch64::ST3i8;
6568 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6569 Opc = AArch64::ST3i16;
6570 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6571 Opc = AArch64::ST3i32;
6572 else if (Ty == LLT::fixed_vector(2, S64) ||
6573 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6574 Opc = AArch64::ST3i64;
6575 else
6576 llvm_unreachable("Unexpected type for st3lane!");
6577 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6578 return false;
6579 break;
6580 }
6581 case Intrinsic::aarch64_neon_st4lane: {
6582 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6583 unsigned Opc;
6584 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6585 Opc = AArch64::ST4i8;
6586 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6587 Opc = AArch64::ST4i16;
6588 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6589 Opc = AArch64::ST4i32;
6590 else if (Ty == LLT::fixed_vector(2, S64) ||
6591 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6592 Opc = AArch64::ST4i64;
6593 else
6594 llvm_unreachable("Unexpected type for st4lane!");
6595 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6596 return false;
6597 break;
6598 }
6599 case Intrinsic::aarch64_mops_memset_tag: {
6600 // Transform
6601 // %dst:gpr(p0) = \
6602 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6603 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6604 // where %dst is updated, into
6605 // %Rd:GPR64common, %Rn:GPR64) = \
6606 // MOPSMemorySetTaggingPseudo \
6607 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6608 // where Rd and Rn are tied.
6609 // It is expected that %val has been extended to s64 in legalization.
6610 // Note that the order of the size/value operands are swapped.
6611
6612 Register DstDef = I.getOperand(0).getReg();
6613 // I.getOperand(1) is the intrinsic function
6614 Register DstUse = I.getOperand(2).getReg();
6615 Register ValUse = I.getOperand(3).getReg();
6616 Register SizeUse = I.getOperand(4).getReg();
6617
6618 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6619 // Therefore an additional virtual register is required for the updated size
6620 // operand. This value is not accessible via the semantics of the intrinsic.
6622
6623 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6624 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6625 Memset.cloneMemRefs(I);
6627 break;
6628 }
6629 case Intrinsic::ptrauth_resign_load_relative: {
6630 Register DstReg = I.getOperand(0).getReg();
6631 Register ValReg = I.getOperand(2).getReg();
6632 uint64_t AUTKey = I.getOperand(3).getImm();
6633 Register AUTDisc = I.getOperand(4).getReg();
6634 uint64_t PACKey = I.getOperand(5).getImm();
6635 Register PACDisc = I.getOperand(6).getReg();
6636 int64_t Addend = I.getOperand(7).getImm();
6637
6638 Register AUTAddrDisc = AUTDisc;
6639 uint16_t AUTConstDiscC = 0;
6640 std::tie(AUTConstDiscC, AUTAddrDisc) =
6642
6643 Register PACAddrDisc = PACDisc;
6644 uint16_t PACConstDiscC = 0;
6645 std::tie(PACConstDiscC, PACAddrDisc) =
6647
6648 MIB.buildCopy({AArch64::X16}, {ValReg});
6649
6650 MIB.buildInstr(AArch64::AUTRELLOADPAC)
6651 .addImm(AUTKey)
6652 .addImm(AUTConstDiscC)
6653 .addUse(AUTAddrDisc)
6654 .addImm(PACKey)
6655 .addImm(PACConstDiscC)
6656 .addUse(PACAddrDisc)
6657 .addImm(Addend)
6658 .constrainAllUses(TII, TRI, RBI);
6659 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6660
6661 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6662 I.eraseFromParent();
6663 return true;
6664 }
6665 }
6666
6667 I.eraseFromParent();
6668 return true;
6669}
6670
6671bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6672 MachineRegisterInfo &MRI) {
6673 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6674
6675 switch (IntrinID) {
6676 default:
6677 break;
6678 case Intrinsic::ptrauth_resign: {
6679 Register DstReg = I.getOperand(0).getReg();
6680 Register ValReg = I.getOperand(2).getReg();
6681 uint64_t AUTKey = I.getOperand(3).getImm();
6682 Register AUTDisc = I.getOperand(4).getReg();
6683 uint64_t PACKey = I.getOperand(5).getImm();
6684 Register PACDisc = I.getOperand(6).getReg();
6685
6686 Register AUTAddrDisc = AUTDisc;
6687 uint16_t AUTConstDiscC = 0;
6688 std::tie(AUTConstDiscC, AUTAddrDisc) =
6690
6691 Register PACAddrDisc = PACDisc;
6692 uint16_t PACConstDiscC = 0;
6693 std::tie(PACConstDiscC, PACAddrDisc) =
6695
6696 MIB.buildCopy({AArch64::X16}, {ValReg});
6697 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6698 MIB.buildInstr(AArch64::AUTPAC)
6699 .addImm(AUTKey)
6700 .addImm(AUTConstDiscC)
6701 .addUse(AUTAddrDisc)
6702 .addImm(PACKey)
6703 .addImm(PACConstDiscC)
6704 .addUse(PACAddrDisc)
6705 .constrainAllUses(TII, TRI, RBI);
6706 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6707
6708 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6709 I.eraseFromParent();
6710 return true;
6711 }
6712 case Intrinsic::ptrauth_auth: {
6713 Register DstReg = I.getOperand(0).getReg();
6714 Register ValReg = I.getOperand(2).getReg();
6715 uint64_t AUTKey = I.getOperand(3).getImm();
6716 Register AUTDisc = I.getOperand(4).getReg();
6717
6718 Register AUTAddrDisc = AUTDisc;
6719 uint16_t AUTConstDiscC = 0;
6720 std::tie(AUTConstDiscC, AUTAddrDisc) =
6722
6723 if (STI.isX16X17Safer()) {
6724 MIB.buildCopy({AArch64::X16}, {ValReg});
6725 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6726 MIB.buildInstr(AArch64::AUTx16x17)
6727 .addImm(AUTKey)
6728 .addImm(AUTConstDiscC)
6729 .addUse(AUTAddrDisc)
6730 .constrainAllUses(TII, TRI, RBI);
6731 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6732 } else {
6733 Register ScratchReg =
6734 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6735 MIB.buildInstr(AArch64::AUTxMxN)
6736 .addDef(DstReg)
6737 .addDef(ScratchReg)
6738 .addUse(ValReg)
6739 .addImm(AUTKey)
6740 .addImm(AUTConstDiscC)
6741 .addUse(AUTAddrDisc)
6742 .constrainAllUses(TII, TRI, RBI);
6743 }
6744
6745 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6746 I.eraseFromParent();
6747 return true;
6748 }
6749 case Intrinsic::frameaddress:
6750 case Intrinsic::returnaddress: {
6751 MachineFunction &MF = *I.getParent()->getParent();
6752 MachineFrameInfo &MFI = MF.getFrameInfo();
6753
6754 unsigned Depth = I.getOperand(2).getImm();
6755 Register DstReg = I.getOperand(0).getReg();
6756 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6757
6758 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6759 if (!MFReturnAddr) {
6760 // Insert the copy from LR/X30 into the entry block, before it can be
6761 // clobbered by anything.
6762 MFI.setReturnAddressIsTaken(true);
6763 MFReturnAddr = getFunctionLiveInPhysReg(
6764 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6765 }
6766
6767 if (STI.hasPAuth()) {
6768 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6769 } else {
6770 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6771 MIB.buildInstr(AArch64::XPACLRI);
6772 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6773 }
6774
6775 I.eraseFromParent();
6776 return true;
6777 }
6778
6779 MFI.setFrameAddressIsTaken(true);
6780 Register FrameAddr(AArch64::FP);
6781 while (Depth--) {
6782 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6783 auto Ldr =
6784 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6786 FrameAddr = NextFrame;
6787 }
6788
6789 if (IntrinID == Intrinsic::frameaddress)
6790 MIB.buildCopy({DstReg}, {FrameAddr});
6791 else {
6792 MFI.setReturnAddressIsTaken(true);
6793
6794 if (STI.hasPAuth()) {
6795 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6796 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6797 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6798 } else {
6799 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6800 .addImm(1);
6801 MIB.buildInstr(AArch64::XPACLRI);
6802 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6803 }
6804 }
6805
6806 I.eraseFromParent();
6807 return true;
6808 }
6809 case Intrinsic::aarch64_neon_tbl2:
6810 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6811 return true;
6812 case Intrinsic::aarch64_neon_tbl3:
6813 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6814 false);
6815 return true;
6816 case Intrinsic::aarch64_neon_tbl4:
6817 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6818 return true;
6819 case Intrinsic::aarch64_neon_tbx2:
6820 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6821 return true;
6822 case Intrinsic::aarch64_neon_tbx3:
6823 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6824 return true;
6825 case Intrinsic::aarch64_neon_tbx4:
6826 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6827 return true;
6828 case Intrinsic::swift_async_context_addr:
6829 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6830 {Register(AArch64::FP)})
6831 .addImm(8)
6832 .addImm(0);
6834
6836 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6837 I.eraseFromParent();
6838 return true;
6839 }
6840 return false;
6841}
6842
6843// G_PTRAUTH_GLOBAL_VALUE lowering
6844//
6845// We have 3 lowering alternatives to choose from:
6846// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6847// If the GV doesn't need a GOT load (i.e., is locally defined)
6848// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6849//
6850// - LOADgotPAC: similar to LOADgot, with added PAC.
6851// If the GV needs a GOT load, materialize the pointer using the usual
6852// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6853// section is assumed to be read-only (for example, via relro mechanism). See
6854// LowerMOVaddrPAC.
6855//
6856// - LOADauthptrstatic: similar to LOADgot, but use a
6857// special stub slot instead of a GOT slot.
6858// Load a signed pointer for symbol 'sym' from a stub slot named
6859// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6860// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6861// .data with an
6862// @AUTH relocation. See LowerLOADauthptrstatic.
6863//
6864// All 3 are pseudos that are expand late to longer sequences: this lets us
6865// provide integrity guarantees on the to-be-signed intermediate values.
6866//
6867// LOADauthptrstatic is undesirable because it requires a large section filled
6868// with often similarly-signed pointers, making it a good harvesting target.
6869// Thus, it's only used for ptrauth references to extern_weak to avoid null
6870// checks.
6871
6872bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6873 MachineInstr &I, MachineRegisterInfo &MRI) const {
6874 Register DefReg = I.getOperand(0).getReg();
6875 Register Addr = I.getOperand(1).getReg();
6876 uint64_t Key = I.getOperand(2).getImm();
6877 Register AddrDisc = I.getOperand(3).getReg();
6878 uint64_t Disc = I.getOperand(4).getImm();
6879 int64_t Offset = 0;
6880
6882 report_fatal_error("key in ptrauth global out of range [0, " +
6883 Twine((int)AArch64PACKey::LAST) + "]");
6884
6885 // Blend only works if the integer discriminator is 16-bit wide.
6886 if (!isUInt<16>(Disc))
6888 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6889
6890 // Choosing between 3 lowering alternatives is target-specific.
6891 if (!STI.isTargetELF() && !STI.isTargetMachO())
6892 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6893
6894 if (!MRI.hasOneDef(Addr))
6895 return false;
6896
6897 // First match any offset we take from the real global.
6898 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6899 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6900 Register OffsetReg = DefMI->getOperand(2).getReg();
6901 if (!MRI.hasOneDef(OffsetReg))
6902 return false;
6903 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6904 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6905 return false;
6906
6907 Addr = DefMI->getOperand(1).getReg();
6908 if (!MRI.hasOneDef(Addr))
6909 return false;
6910
6911 DefMI = &*MRI.def_instr_begin(Addr);
6912 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6913 }
6914
6915 // We should be left with a genuine unauthenticated GlobalValue.
6916 const GlobalValue *GV;
6917 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6918 GV = DefMI->getOperand(1).getGlobal();
6920 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6921 GV = DefMI->getOperand(2).getGlobal();
6923 } else {
6924 return false;
6925 }
6926
6927 MachineIRBuilder MIB(I);
6928
6929 // Classify the reference to determine whether it needs a GOT load.
6930 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6931 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6932 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6933 "unsupported non-GOT op flags on ptrauth global reference");
6934 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6935 "unsupported non-GOT reference to weak ptrauth global");
6936
6937 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6938 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6939
6940 // Non-extern_weak:
6941 // - No GOT load needed -> MOVaddrPAC
6942 // - GOT load for non-extern_weak -> LOADgotPAC
6943 // Note that we disallow extern_weak refs to avoid null checks later.
6944 if (!GV->hasExternalWeakLinkage()) {
6945 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6946 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6947 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6949 .addImm(Key)
6950 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6951 .addImm(Disc)
6952 .constrainAllUses(TII, TRI, RBI);
6953 MIB.buildCopy(DefReg, Register(AArch64::X16));
6954 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6955 I.eraseFromParent();
6956 return true;
6957 }
6958
6959 // extern_weak -> LOADauthptrstatic
6960
6961 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6962 // offset alone as a pointer if the symbol wasn't available, which would
6963 // probably break null checks in users. Ptrauth complicates things further:
6964 // error out.
6965 if (Offset != 0)
6967 "unsupported non-zero offset in weak ptrauth global reference");
6968
6969 if (HasAddrDisc)
6970 report_fatal_error("unsupported weak addr-div ptrauth global");
6971
6972 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6973 .addGlobalAddress(GV, Offset)
6974 .addImm(Key)
6975 .addImm(Disc);
6976 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6977
6978 I.eraseFromParent();
6979 return true;
6980}
6981
6982void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6983 MachineRegisterInfo &MRI,
6984 unsigned NumVec, unsigned Opc1,
6985 unsigned Opc2, bool isExt) {
6986 Register DstReg = I.getOperand(0).getReg();
6987 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6988
6989 // Create the REG_SEQUENCE
6991 for (unsigned i = 0; i < NumVec; i++)
6992 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6993 Register RegSeq = createQTuple(Regs, MIB);
6994
6995 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6996 MachineInstrBuilder Instr;
6997 if (isExt) {
6998 Register Reg = I.getOperand(2).getReg();
6999 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
7000 } else
7001 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
7003 I.eraseFromParent();
7004}
7005
7006InstructionSelector::ComplexRendererFns
7007AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
7008 auto MaybeImmed = getImmedFromMO(Root);
7009 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7010 return std::nullopt;
7011 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
7012 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7013}
7014
7015InstructionSelector::ComplexRendererFns
7016AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
7017 auto MaybeImmed = getImmedFromMO(Root);
7018 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7019 return std::nullopt;
7020 uint64_t Enc = 31 - *MaybeImmed;
7021 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7022}
7023
7024InstructionSelector::ComplexRendererFns
7025AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7026 auto MaybeImmed = getImmedFromMO(Root);
7027 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7028 return std::nullopt;
7029 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7030 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7031}
7032
7033InstructionSelector::ComplexRendererFns
7034AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7035 auto MaybeImmed = getImmedFromMO(Root);
7036 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7037 return std::nullopt;
7038 uint64_t Enc = 63 - *MaybeImmed;
7039 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7040}
7041
7042/// Helper to select an immediate value that can be represented as a 12-bit
7043/// value shifted left by either 0 or 12. If it is possible to do so, return
7044/// the immediate and shift value. If not, return std::nullopt.
7045///
7046/// Used by selectArithImmed and selectNegArithImmed.
7047InstructionSelector::ComplexRendererFns
7048AArch64InstructionSelector::select12BitValueWithLeftShift(
7049 uint64_t Immed) const {
7050 unsigned ShiftAmt;
7051 if (Immed >> 12 == 0) {
7052 ShiftAmt = 0;
7053 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7054 ShiftAmt = 12;
7055 Immed = Immed >> 12;
7056 } else
7057 return std::nullopt;
7058
7059 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7060 return {{
7061 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7062 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7063 }};
7064}
7065
7066/// SelectArithImmed - Select an immediate value that can be represented as
7067/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7068/// Val set to the 12-bit value and Shift set to the shifter operand.
7069InstructionSelector::ComplexRendererFns
7070AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7071 // This function is called from the addsub_shifted_imm ComplexPattern,
7072 // which lists [imm] as the list of opcode it's interested in, however
7073 // we still need to check whether the operand is actually an immediate
7074 // here because the ComplexPattern opcode list is only used in
7075 // root-level opcode matching.
7076 auto MaybeImmed = getImmedFromMO(Root);
7077 if (MaybeImmed == std::nullopt)
7078 return std::nullopt;
7079 return select12BitValueWithLeftShift(*MaybeImmed);
7080}
7081
7082/// SelectNegArithImmed - As above, but negates the value before trying to
7083/// select it.
7084InstructionSelector::ComplexRendererFns
7085AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7086 // We need a register here, because we need to know if we have a 64 or 32
7087 // bit immediate.
7088 if (!Root.isReg())
7089 return std::nullopt;
7090 auto MaybeImmed = getImmedFromMO(Root);
7091 if (MaybeImmed == std::nullopt)
7092 return std::nullopt;
7093 uint64_t Immed = *MaybeImmed;
7094
7095 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7096 // have the opposite effect on the C flag, so this pattern mustn't match under
7097 // those circumstances.
7098 if (Immed == 0)
7099 return std::nullopt;
7100
7101 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7102 // the root.
7103 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7104 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7105 Immed = ~((uint32_t)Immed) + 1;
7106 else
7107 Immed = ~Immed + 1ULL;
7108
7109 if (Immed & 0xFFFFFFFFFF000000ULL)
7110 return std::nullopt;
7111
7112 Immed &= 0xFFFFFFULL;
7113 return select12BitValueWithLeftShift(Immed);
7114}
7115
7116/// Checks if we are sure that folding MI into load/store addressing mode is
7117/// beneficial or not.
7118///
7119/// Returns:
7120/// - true if folding MI would be beneficial.
7121/// - false if folding MI would be bad.
7122/// - std::nullopt if it is not sure whether folding MI is beneficial.
7123///
7124/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7125///
7126/// %13:gpr(s64) = G_CONSTANT i64 1
7127/// %8:gpr(s64) = G_SHL %6, %13(s64)
7128/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7129/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7130std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7131 const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7132 if (MI.getOpcode() == AArch64::G_SHL) {
7133 // Address operands with shifts are free, except for running on subtargets
7134 // with AddrLSLSlow14.
7135 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7136 MI.getOperand(2).getReg(), MRI)) {
7137 const APInt ShiftVal = ValAndVeg->Value;
7138
7139 // Don't fold if we know this will be slow.
7140 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7141 }
7142 }
7143 return std::nullopt;
7144}
7145
7146/// Return true if it is worth folding MI into an extended register. That is,
7147/// if it's safe to pull it into the addressing mode of a load or store as a
7148/// shift.
7149/// \p IsAddrOperand whether the def of MI is used as an address operand
7150/// (e.g. feeding into an LDR/STR).
7151bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7152 const MachineInstr &MI, const MachineRegisterInfo &MRI,
7153 bool IsAddrOperand) const {
7154
7155 // Always fold if there is one use, or if we're optimizing for size.
7156 Register DefReg = MI.getOperand(0).getReg();
7157 if (MRI.hasOneNonDBGUse(DefReg) ||
7158 MI.getParent()->getParent()->getFunction().hasOptSize())
7159 return true;
7160
7161 if (IsAddrOperand) {
7162 // If we are already sure that folding MI is good or bad, return the result.
7163 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7164 return *Worth;
7165
7166 // Fold G_PTR_ADD if its offset operand can be folded
7167 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7168 MachineInstr *OffsetInst =
7169 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7170
7171 // Note, we already know G_PTR_ADD is used by at least two instructions.
7172 // If we are also sure about whether folding is beneficial or not,
7173 // return the result.
7174 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7175 return *Worth;
7176 }
7177 }
7178
7179 // FIXME: Consider checking HasALULSLFast as appropriate.
7180
7181 // We have a fastpath, so folding a shift in and potentially computing it
7182 // many times may be beneficial. Check if this is only used in memory ops.
7183 // If it is, then we should fold.
7184 return all_of(MRI.use_nodbg_instructions(DefReg),
7185 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7186}
7187
7188InstructionSelector::ComplexRendererFns
7189AArch64InstructionSelector::selectExtendedSHL(
7190 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7191 unsigned SizeInBytes, bool WantsExt) const {
7192 assert(Base.isReg() && "Expected base to be a register operand");
7193 assert(Offset.isReg() && "Expected offset to be a register operand");
7194
7195 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7196 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7197
7198 unsigned OffsetOpc = OffsetInst->getOpcode();
7199 bool LookedThroughZExt = false;
7200 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7201 // Try to look through a ZEXT.
7202 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7203 return std::nullopt;
7204
7205 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7206 OffsetOpc = OffsetInst->getOpcode();
7207 LookedThroughZExt = true;
7208
7209 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7210 return std::nullopt;
7211 }
7212 // Make sure that the memory op is a valid size.
7213 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7214 if (LegalShiftVal == 0)
7215 return std::nullopt;
7216 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7217 return std::nullopt;
7218
7219 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7220 // register we will offset is the LHS, and the register containing the
7221 // constant is the RHS.
7222 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7223 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7224 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7225 if (!ValAndVReg) {
7226 // We didn't get a constant on the RHS. If the opcode is a shift, then
7227 // we're done.
7228 if (OffsetOpc == TargetOpcode::G_SHL)
7229 return std::nullopt;
7230
7231 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7232 std::swap(OffsetReg, ConstantReg);
7233 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7234 if (!ValAndVReg)
7235 return std::nullopt;
7236 }
7237
7238 // The value must fit into 3 bits, and must be positive. Make sure that is
7239 // true.
7240 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7241
7242 // Since we're going to pull this into a shift, the constant value must be
7243 // a power of 2. If we got a multiply, then we need to check this.
7244 if (OffsetOpc == TargetOpcode::G_MUL) {
7245 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7246 return std::nullopt;
7247
7248 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7249 ImmVal = Log2_32(ImmVal);
7250 }
7251
7252 if ((ImmVal & 0x7) != ImmVal)
7253 return std::nullopt;
7254
7255 // We are only allowed to shift by LegalShiftVal. This shift value is built
7256 // into the instruction, so we can't just use whatever we want.
7257 if (ImmVal != LegalShiftVal)
7258 return std::nullopt;
7259
7260 unsigned SignExtend = 0;
7261 if (WantsExt) {
7262 // Check if the offset is defined by an extend, unless we looked through a
7263 // G_ZEXT earlier.
7264 if (!LookedThroughZExt) {
7265 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7266 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7268 return std::nullopt;
7269
7270 SignExtend = AArch64_AM::isSignExtendShiftType(Ext) ? 1 : 0;
7271 // We only support SXTW for signed extension here.
7272 if (SignExtend && Ext != AArch64_AM::SXTW)
7273 return std::nullopt;
7274 OffsetReg = ExtInst->getOperand(1).getReg();
7275 }
7276
7277 // Need a 32-bit wide register here.
7278 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7279 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7280 }
7281
7282 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7283 // offset. Signify that we are shifting by setting the shift flag to 1.
7284 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7285 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7286 [=](MachineInstrBuilder &MIB) {
7287 // Need to add both immediates here to make sure that they are both
7288 // added to the instruction.
7289 MIB.addImm(SignExtend);
7290 MIB.addImm(1);
7291 }}};
7292}
7293
7294/// This is used for computing addresses like this:
7295///
7296/// ldr x1, [x2, x3, lsl #3]
7297///
7298/// Where x2 is the base register, and x3 is an offset register. The shift-left
7299/// is a constant value specific to this load instruction. That is, we'll never
7300/// see anything other than a 3 here (which corresponds to the size of the
7301/// element being loaded.)
7302InstructionSelector::ComplexRendererFns
7303AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7304 MachineOperand &Root, unsigned SizeInBytes) const {
7305 if (!Root.isReg())
7306 return std::nullopt;
7307 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7308
7309 // We want to find something like this:
7310 //
7311 // val = G_CONSTANT LegalShiftVal
7312 // shift = G_SHL off_reg val
7313 // ptr = G_PTR_ADD base_reg shift
7314 // x = G_LOAD ptr
7315 //
7316 // And fold it into this addressing mode:
7317 //
7318 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7319
7320 // Check if we can find the G_PTR_ADD.
7321 MachineInstr *PtrAdd =
7322 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7323 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7324 return std::nullopt;
7325
7326 // Now, try to match an opcode which will match our specific offset.
7327 // We want a G_SHL or a G_MUL.
7328 MachineInstr *OffsetInst =
7329 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
7330 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7331 OffsetInst->getOperand(0), SizeInBytes,
7332 /*WantsExt=*/false);
7333}
7334
7335/// This is used for computing addresses like this:
7336///
7337/// ldr x1, [x2, x3]
7338///
7339/// Where x2 is the base register, and x3 is an offset register.
7340///
7341/// When possible (or profitable) to fold a G_PTR_ADD into the address
7342/// calculation, this will do so. Otherwise, it will return std::nullopt.
7343InstructionSelector::ComplexRendererFns
7344AArch64InstructionSelector::selectAddrModeRegisterOffset(
7345 MachineOperand &Root) const {
7346 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7347
7348 // We need a GEP.
7349 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7350 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7351 return std::nullopt;
7352
7353 // If this is used more than once, let's not bother folding.
7354 // TODO: Check if they are memory ops. If they are, then we can still fold
7355 // without having to recompute anything.
7356 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7357 return std::nullopt;
7358
7359 // Base is the GEP's LHS, offset is its RHS.
7360 return {{[=](MachineInstrBuilder &MIB) {
7361 MIB.addUse(Gep->getOperand(1).getReg());
7362 },
7363 [=](MachineInstrBuilder &MIB) {
7364 MIB.addUse(Gep->getOperand(2).getReg());
7365 },
7366 [=](MachineInstrBuilder &MIB) {
7367 // Need to add both immediates here to make sure that they are both
7368 // added to the instruction.
7369 MIB.addImm(0);
7370 MIB.addImm(0);
7371 }}};
7372}
7373
7374/// This is intended to be equivalent to selectAddrModeXRO in
7375/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7376InstructionSelector::ComplexRendererFns
7377AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7378 unsigned SizeInBytes) const {
7379 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7380 if (!Root.isReg())
7381 return std::nullopt;
7382 MachineInstr *PtrAdd =
7383 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7384 if (!PtrAdd)
7385 return std::nullopt;
7386
7387 // Check for an immediates which cannot be encoded in the [base + imm]
7388 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7389 // end up with code like:
7390 //
7391 // mov x0, wide
7392 // add x1 base, x0
7393 // ldr x2, [x1, x0]
7394 //
7395 // In this situation, we can use the [base, xreg] addressing mode to save an
7396 // add/sub:
7397 //
7398 // mov x0, wide
7399 // ldr x2, [base, x0]
7400 auto ValAndVReg =
7402 if (ValAndVReg) {
7403 unsigned Scale = Log2_32(SizeInBytes);
7404 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7405
7406 // Skip immediates that can be selected in the load/store addressing
7407 // mode.
7408 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7409 ImmOff < (0x1000 << Scale))
7410 return std::nullopt;
7411
7412 // Helper lambda to decide whether or not it is preferable to emit an add.
7413 auto isPreferredADD = [](int64_t ImmOff) {
7414 // Constants in [0x0, 0xfff] can be encoded in an add.
7415 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7416 return true;
7417
7418 // Can it be encoded in an add lsl #12?
7419 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7420 return false;
7421
7422 // It can be encoded in an add lsl #12, but we may not want to. If it is
7423 // possible to select this as a single movz, then prefer that. A single
7424 // movz is faster than an add with a shift.
7425 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7426 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7427 };
7428
7429 // If the immediate can be encoded in a single add/sub, then bail out.
7430 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7431 return std::nullopt;
7432 }
7433
7434 // Try to fold shifts into the addressing mode.
7435 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7436 if (AddrModeFns)
7437 return AddrModeFns;
7438
7439 // If that doesn't work, see if it's possible to fold in registers from
7440 // a GEP.
7441 return selectAddrModeRegisterOffset(Root);
7442}
7443
7444/// This is used for computing addresses like this:
7445///
7446/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7447///
7448/// Where we have a 64-bit base register, a 32-bit offset register, and an
7449/// extend (which may or may not be signed).
7450InstructionSelector::ComplexRendererFns
7451AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7452 unsigned SizeInBytes) const {
7453 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7454
7455 MachineInstr *PtrAdd =
7456 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7457 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7458 return std::nullopt;
7459
7460 MachineOperand &LHS = PtrAdd->getOperand(1);
7461 MachineOperand &RHS = PtrAdd->getOperand(2);
7462 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7463
7464 // The first case is the same as selectAddrModeXRO, except we need an extend.
7465 // In this case, we try to find a shift and extend, and fold them into the
7466 // addressing mode.
7467 //
7468 // E.g.
7469 //
7470 // off_reg = G_Z/S/ANYEXT ext_reg
7471 // val = G_CONSTANT LegalShiftVal
7472 // shift = G_SHL off_reg val
7473 // ptr = G_PTR_ADD base_reg shift
7474 // x = G_LOAD ptr
7475 //
7476 // In this case we can get a load like this:
7477 //
7478 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7479 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7480 SizeInBytes, /*WantsExt=*/true);
7481 if (ExtendedShl)
7482 return ExtendedShl;
7483
7484 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7485 //
7486 // e.g.
7487 // ldr something, [base_reg, ext_reg, sxtw]
7488 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7489 return std::nullopt;
7490
7491 // Check if this is an extend. We'll get an extend type if it is.
7493 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7495 return std::nullopt;
7496
7497 // Need a 32-bit wide register.
7498 MachineIRBuilder MIB(*PtrAdd);
7499 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7500 AArch64::GPR32RegClass, MIB);
7501 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7502
7503 // Base is LHS, offset is ExtReg.
7504 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7505 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7506 [=](MachineInstrBuilder &MIB) {
7507 MIB.addImm(SignExtend);
7508 MIB.addImm(0);
7509 }}};
7510}
7511
7512/// Select a "register plus unscaled signed 9-bit immediate" address. This
7513/// should only match when there is an offset that is not valid for a scaled
7514/// immediate addressing mode. The "Size" argument is the size in bytes of the
7515/// memory reference, which is needed here to know what is valid for a scaled
7516/// immediate.
7517InstructionSelector::ComplexRendererFns
7518AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7519 unsigned Size) const {
7520 MachineRegisterInfo &MRI =
7521 Root.getParent()->getParent()->getParent()->getRegInfo();
7522
7523 if (!Root.isReg())
7524 return std::nullopt;
7525
7526 if (!isBaseWithConstantOffset(Root, MRI))
7527 return std::nullopt;
7528
7529 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7530
7531 MachineOperand &OffImm = RootDef->getOperand(2);
7532 if (!OffImm.isReg())
7533 return std::nullopt;
7534 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7535 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7536 return std::nullopt;
7537 int64_t RHSC;
7538 MachineOperand &RHSOp1 = RHS->getOperand(1);
7539 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7540 return std::nullopt;
7541 RHSC = RHSOp1.getCImm()->getSExtValue();
7542
7543 if (RHSC >= -256 && RHSC < 256) {
7544 MachineOperand &Base = RootDef->getOperand(1);
7545 return {{
7546 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7547 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7548 }};
7549 }
7550 return std::nullopt;
7551}
7552
7553InstructionSelector::ComplexRendererFns
7554AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7555 unsigned Size,
7556 MachineRegisterInfo &MRI) const {
7557 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7558 return std::nullopt;
7559 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7560 if (Adrp.getOpcode() != AArch64::ADRP)
7561 return std::nullopt;
7562
7563 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7564 auto Offset = Adrp.getOperand(1).getOffset();
7565 if (Offset % Size != 0)
7566 return std::nullopt;
7567
7568 auto GV = Adrp.getOperand(1).getGlobal();
7569 if (GV->isThreadLocal())
7570 return std::nullopt;
7571
7572 auto &MF = *RootDef.getParent()->getParent();
7573 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7574 return std::nullopt;
7575
7576 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7577 MachineIRBuilder MIRBuilder(RootDef);
7578 Register AdrpReg = Adrp.getOperand(0).getReg();
7579 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7580 [=](MachineInstrBuilder &MIB) {
7581 MIB.addGlobalAddress(GV, Offset,
7582 OpFlags | AArch64II::MO_PAGEOFF |
7584 }}};
7585}
7586
7587/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7588/// "Size" argument is the size in bytes of the memory reference, which
7589/// determines the scale.
7590InstructionSelector::ComplexRendererFns
7591AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7592 unsigned Size) const {
7593 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7594 MachineRegisterInfo &MRI = MF.getRegInfo();
7595
7596 if (!Root.isReg())
7597 return std::nullopt;
7598
7599 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7600 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7601 return {{
7602 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7603 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7604 }};
7605 }
7606
7608 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7609 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7610 // globals into the offset.
7611 MachineInstr *RootParent = Root.getParent();
7612 if (CM == CodeModel::Small &&
7613 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7614 STI.isTargetDarwin())) {
7615 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7616 if (OpFns)
7617 return OpFns;
7618 }
7619
7620 if (isBaseWithConstantOffset(Root, MRI)) {
7621 MachineOperand &LHS = RootDef->getOperand(1);
7622 MachineOperand &RHS = RootDef->getOperand(2);
7623 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7624 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7625
7626 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7627 unsigned Scale = Log2_32(Size);
7628 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7629 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7630 return {{
7631 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7632 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7633 }};
7634
7635 return {{
7636 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7637 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7638 }};
7639 }
7640 }
7641
7642 // Before falling back to our general case, check if the unscaled
7643 // instructions can handle this. If so, that's preferable.
7644 if (selectAddrModeUnscaled(Root, Size))
7645 return std::nullopt;
7646
7647 return {{
7648 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7649 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7650 }};
7651}
7652
7653/// Given a shift instruction, return the correct shift type for that
7654/// instruction.
7656 switch (MI.getOpcode()) {
7657 default:
7659 case TargetOpcode::G_SHL:
7660 return AArch64_AM::LSL;
7661 case TargetOpcode::G_LSHR:
7662 return AArch64_AM::LSR;
7663 case TargetOpcode::G_ASHR:
7664 return AArch64_AM::ASR;
7665 case TargetOpcode::G_ROTR:
7666 return AArch64_AM::ROR;
7667 }
7668}
7669
7670/// Select a "shifted register" operand. If the value is not shifted, set the
7671/// shift operand to a default value of "lsl 0".
7672InstructionSelector::ComplexRendererFns
7673AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7674 bool AllowROR) const {
7675 if (!Root.isReg())
7676 return std::nullopt;
7677 MachineRegisterInfo &MRI =
7678 Root.getParent()->getParent()->getParent()->getRegInfo();
7679
7680 // Check if the operand is defined by an instruction which corresponds to
7681 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7682 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7684 if (ShType == AArch64_AM::InvalidShiftExtend)
7685 return std::nullopt;
7686 if (ShType == AArch64_AM::ROR && !AllowROR)
7687 return std::nullopt;
7688 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7689 return std::nullopt;
7690
7691 // Need an immediate on the RHS.
7692 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7693 auto Immed = getImmedFromMO(ShiftRHS);
7694 if (!Immed)
7695 return std::nullopt;
7696
7697 // We have something that we can fold. Fold in the shift's LHS and RHS into
7698 // the instruction.
7699 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7700 Register ShiftReg = ShiftLHS.getReg();
7701
7702 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7703 unsigned Val = *Immed & (NumBits - 1);
7704 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7705
7706 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7707 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7708}
7709
7710AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7711 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7712 unsigned Opc = MI.getOpcode();
7713
7714 // Handle explicit extend instructions first.
7715 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7716 unsigned Size;
7717 if (Opc == TargetOpcode::G_SEXT)
7718 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7719 else
7720 Size = MI.getOperand(2).getImm();
7721 assert(Size != 64 && "Extend from 64 bits?");
7722 switch (Size) {
7723 case 8:
7724 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7725 case 16:
7726 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7727 case 32:
7728 return AArch64_AM::SXTW;
7729 default:
7731 }
7732 }
7733
7734 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7735 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7736 assert(Size != 64 && "Extend from 64 bits?");
7737 switch (Size) {
7738 case 8:
7739 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7740 case 16:
7741 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7742 case 32:
7743 return AArch64_AM::UXTW;
7744 default:
7746 }
7747 }
7748
7749 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7750 // on the RHS.
7751 if (Opc != TargetOpcode::G_AND)
7753
7754 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7755 if (!MaybeAndMask)
7757 uint64_t AndMask = *MaybeAndMask;
7758 switch (AndMask) {
7759 default:
7761 case 0xFF:
7762 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7763 case 0xFFFF:
7764 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7765 case 0xFFFFFFFF:
7766 return AArch64_AM::UXTW;
7767 }
7768}
7769
7770Register AArch64InstructionSelector::moveScalarRegClass(
7771 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7772 MachineRegisterInfo &MRI = *MIB.getMRI();
7773 auto Ty = MRI.getType(Reg);
7774 assert(!Ty.isVector() && "Expected scalars only!");
7775 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7776 return Reg;
7777
7778 // Create a copy and immediately select it.
7779 // FIXME: We should have an emitCopy function?
7780 auto Copy = MIB.buildCopy({&RC}, {Reg});
7781 selectCopy(*Copy, TII, MRI, TRI, RBI);
7782 return Copy.getReg(0);
7783}
7784
7785/// Select an "extended register" operand. This operand folds in an extend
7786/// followed by an optional left shift.
7787InstructionSelector::ComplexRendererFns
7788AArch64InstructionSelector::selectArithExtendedRegister(
7789 MachineOperand &Root) const {
7790 if (!Root.isReg())
7791 return std::nullopt;
7792 MachineRegisterInfo &MRI =
7793 Root.getParent()->getParent()->getParent()->getRegInfo();
7794
7795 uint64_t ShiftVal = 0;
7796 Register ExtReg;
7798 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7799 if (!RootDef)
7800 return std::nullopt;
7801
7802 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7803 return std::nullopt;
7804
7805 // Check if we can fold a shift and an extend.
7806 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7807 // Look for a constant on the RHS of the shift.
7808 MachineOperand &RHS = RootDef->getOperand(2);
7809 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7810 if (!MaybeShiftVal)
7811 return std::nullopt;
7812 ShiftVal = *MaybeShiftVal;
7813 if (ShiftVal > 4)
7814 return std::nullopt;
7815 // Look for a valid extend instruction on the LHS of the shift.
7816 MachineOperand &LHS = RootDef->getOperand(1);
7817 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7818 if (!ExtDef)
7819 return std::nullopt;
7820 Ext = getExtendTypeForInst(*ExtDef, MRI);
7822 return std::nullopt;
7823 ExtReg = ExtDef->getOperand(1).getReg();
7824 } else {
7825 // Didn't get a shift. Try just folding an extend.
7826 Ext = getExtendTypeForInst(*RootDef, MRI);
7828 return std::nullopt;
7829 ExtReg = RootDef->getOperand(1).getReg();
7830
7831 // If we have a 32 bit instruction which zeroes out the high half of a
7832 // register, we get an implicit zero extend for free. Check if we have one.
7833 // FIXME: We actually emit the extend right now even though we don't have
7834 // to.
7835 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7836 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7837 if (isDef32(*ExtInst))
7838 return std::nullopt;
7839 }
7840 }
7841
7842 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7843 // copy.
7844 MachineIRBuilder MIB(*RootDef);
7845 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7846
7847 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7848 [=](MachineInstrBuilder &MIB) {
7849 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7850 }}};
7851}
7852
7853InstructionSelector::ComplexRendererFns
7854AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7855 if (!Root.isReg())
7856 return std::nullopt;
7857 MachineRegisterInfo &MRI =
7858 Root.getParent()->getParent()->getParent()->getRegInfo();
7859
7860 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7861 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7862 STI.isLittleEndian())
7863 Extract =
7864 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7865 if (!Extract)
7866 return std::nullopt;
7867
7868 if (auto *Unmerge = dyn_cast<GUnmerge>(Extract->MI)) {
7869 if (Unmerge->getNumDefs() == 2 &&
7870 Extract->Reg == Unmerge->getOperand(1).getReg()) {
7871 Register ExtReg = Unmerge->getSourceReg();
7872 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7873 }
7874 }
7875 if (auto *ExtElt = dyn_cast<GExtractVectorElement>(Extract->MI)) {
7876 LLT SrcTy = MRI.getType(ExtElt->getVectorReg());
7877 auto LaneIdx =
7878 getIConstantVRegValWithLookThrough(ExtElt->getIndexReg(), MRI);
7879 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7880 LaneIdx->Value.getSExtValue() == 1) {
7881 Register ExtReg = ExtElt->getVectorReg();
7882 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7883 }
7884 }
7885 if (auto *Subvec = dyn_cast<GExtractSubvector>(Extract->MI)) {
7886 LLT SrcTy = MRI.getType(Subvec->getSrcVec());
7887 auto LaneIdx = Subvec->getIndexImm();
7888 if (LaneIdx == SrcTy.getNumElements() / 2) {
7889 Register ExtReg = Subvec->getSrcVec();
7890 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7891 }
7892 }
7893
7894 return std::nullopt;
7895}
7896
7897InstructionSelector::ComplexRendererFns
7898AArch64InstructionSelector::selectCVTFixedPointVecBase(
7899 const MachineOperand &Root, bool isReciprocal) const {
7900 if (!Root.isReg())
7901 return std::nullopt;
7902 const MachineRegisterInfo &MRI =
7903 Root.getParent()->getParent()->getParent()->getRegInfo();
7904
7905 MachineInstr *Dup = getDefIgnoringCopies(Root.getReg(), MRI);
7906 if (Dup->getOpcode() != AArch64::G_DUP)
7907 return std::nullopt;
7908 std::optional<ValueAndVReg> CstVal =
7910 if (!CstVal)
7911 return std::nullopt;
7912
7913 unsigned RegWidth = MRI.getType(Root.getReg()).getScalarSizeInBits();
7914 APFloat FVal(0.0);
7915 switch (RegWidth) {
7916 case 16:
7917 FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
7918 break;
7919 case 32:
7920 FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
7921 break;
7922 case 64:
7923 FVal = APFloat(APFloat::IEEEdouble(), CstVal->Value);
7924 break;
7925 default:
7926 return std::nullopt;
7927 };
7928 if (unsigned FBits =
7929 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal))
7930 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(FBits); }}};
7931
7932 return std::nullopt;
7933}
7934
7935InstructionSelector::ComplexRendererFns
7936AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
7937 return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ false);
7938}
7939
7940InstructionSelector::ComplexRendererFns
7941AArch64InstructionSelector::selectCVTFixedPosRecipOperandVec(
7942 MachineOperand &Root) const {
7943 return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ true);
7944}
7945
7946void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
7947 const MachineInstr &MI,
7948 int OpIdx) const {
7949 // FIXME: This is only needed to satisfy the type checking in tablegen, and
7950 // should be able to reuse the Renderers already calculated by
7951 // selectCVTFixedPointVecBase.
7952 InstructionSelector::ComplexRendererFns Renderer =
7953 selectCVTFixedPointVecBase(MI.getOperand(OpIdx), /*isReciprocal*/ false);
7954 assert((Renderer && Renderer->size() == 1) &&
7955 "Expected selectCVTFixedPointVec to provide a function\n");
7956 (Renderer->front())(MIB);
7957}
7958
7959void AArch64InstructionSelector::renderFixedPointRecipXForm(
7960 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7961 InstructionSelector::ComplexRendererFns Renderer =
7962 selectCVTFixedPointVecBase(MI.getOperand(OpIdx), /*isReciprocal*/ true);
7963 assert((Renderer && Renderer->size() == 1) &&
7964 "Expected selectCVTFixedPosRecipOperandVec to provide a function\n");
7965 (Renderer->front())(MIB);
7966}
7967
7968void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7969 const MachineInstr &MI,
7970 int OpIdx) const {
7971 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7972 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7973 "Expected G_CONSTANT");
7974 std::optional<int64_t> CstVal =
7975 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7976 assert(CstVal && "Expected constant value");
7977 MIB.addImm(*CstVal);
7978}
7979
7980void AArch64InstructionSelector::renderLogicalImm32(
7981 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7982 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7983 "Expected G_CONSTANT");
7984 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7985 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7986 MIB.addImm(Enc);
7987}
7988
7989void AArch64InstructionSelector::renderLogicalImm64(
7990 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7991 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7992 "Expected G_CONSTANT");
7993 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7994 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7995 MIB.addImm(Enc);
7996}
7997
7998void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7999 const MachineInstr &MI,
8000 int OpIdx) const {
8001 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
8002 "Expected G_UBSANTRAP");
8003 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
8004}
8005
8006void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
8007 const MachineInstr &MI,
8008 int OpIdx) const {
8009 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8010 "Expected G_FCONSTANT");
8011 MIB.addImm(
8012 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
8013}
8014
8015void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
8016 const MachineInstr &MI,
8017 int OpIdx) const {
8018 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8019 "Expected G_FCONSTANT");
8020 MIB.addImm(
8021 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
8022}
8023
8024void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
8025 const MachineInstr &MI,
8026 int OpIdx) const {
8027 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8028 "Expected G_FCONSTANT");
8029 MIB.addImm(
8030 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
8031}
8032
8033void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
8034 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
8035 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8036 "Expected G_FCONSTANT");
8038 .getFPImm()
8039 ->getValueAPF()
8040 .bitcastToAPInt()
8041 .getZExtValue()));
8042}
8043
8044bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
8045 const MachineInstr &MI, unsigned NumBytes) const {
8046 if (!MI.mayLoadOrStore())
8047 return false;
8048 assert(MI.hasOneMemOperand() &&
8049 "Expected load/store to have only one mem op!");
8050 return (*MI.memoperands_begin())->getSize() == NumBytes;
8051}
8052
8053bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8054 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8055 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
8056 return false;
8057
8058 // Only return true if we know the operation will zero-out the high half of
8059 // the 64-bit register. Truncates can be subregister copies, which don't
8060 // zero out the high bits. Copies and other copy-like instructions can be
8061 // fed by truncates, or could be lowered as subregister copies.
8062 switch (MI.getOpcode()) {
8063 default:
8064 return true;
8065 case TargetOpcode::COPY:
8066 case TargetOpcode::G_BITCAST:
8067 case TargetOpcode::G_TRUNC:
8068 case TargetOpcode::G_PHI:
8069 return false;
8070 }
8071}
8072
8073
8074// Perform fixups on the given PHI instruction's operands to force them all
8075// to be the same as the destination regbank.
8077 const AArch64RegisterBankInfo &RBI) {
8078 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8079 Register DstReg = MI.getOperand(0).getReg();
8080 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
8081 assert(DstRB && "Expected PHI dst to have regbank assigned");
8082 MachineIRBuilder MIB(MI);
8083
8084 // Go through each operand and ensure it has the same regbank.
8085 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
8086 if (!MO.isReg())
8087 continue;
8088 Register OpReg = MO.getReg();
8089 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
8090 if (RB != DstRB) {
8091 // Insert a cross-bank copy.
8092 auto *OpDef = MRI.getVRegDef(OpReg);
8093 const LLT &Ty = MRI.getType(OpReg);
8094 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8095
8096 // Any instruction we insert must appear after all PHIs in the block
8097 // for the block to be valid MIR.
8098 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
8099 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8100 InsertPt = OpDefBB.getFirstNonPHI();
8101 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
8102 auto Copy = MIB.buildCopy(Ty, OpReg);
8103 MRI.setRegBank(Copy.getReg(0), *DstRB);
8104 MO.setReg(Copy.getReg(0));
8105 }
8106 }
8107}
8108
8109void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8110 // We're looking for PHIs, build a list so we don't invalidate iterators.
8111 MachineRegisterInfo &MRI = MF.getRegInfo();
8113 for (auto &BB : MF) {
8114 for (auto &MI : BB) {
8115 if (MI.getOpcode() == TargetOpcode::G_PHI)
8116 Phis.emplace_back(&MI);
8117 }
8118 }
8119
8120 for (auto *MI : Phis) {
8121 // We need to do some work here if the operand types are < 16 bit and they
8122 // are split across fpr/gpr banks. Since all types <32b on gpr
8123 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8124 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8125 // be selecting heterogenous regbanks for operands if possible, but we
8126 // still need to be able to deal with it here.
8127 //
8128 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8129 // one other operand is on the fpr bank, then we add cross-bank copies
8130 // to homogenize the operand banks. For simplicity the bank that we choose
8131 // to settle on is whatever bank the def operand has. For example:
8132 //
8133 // %endbb:
8134 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8135 // =>
8136 // %bb2:
8137 // ...
8138 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8139 // ...
8140 // %endbb:
8141 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8142 bool HasGPROp = false, HasFPROp = false;
8143 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8144 if (!MO.isReg())
8145 continue;
8146 const LLT &Ty = MRI.getType(MO.getReg());
8147 if (!Ty.isValid() || !Ty.isScalar())
8148 break;
8149 if (Ty.getSizeInBits() >= 32)
8150 break;
8151 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8152 // If for some reason we don't have a regbank yet. Don't try anything.
8153 if (!RB)
8154 break;
8155
8156 if (RB->getID() == AArch64::GPRRegBankID)
8157 HasGPROp = true;
8158 else
8159 HasFPROp = true;
8160 }
8161 // We have heterogenous regbanks, need to fixup.
8162 if (HasGPROp && HasFPROp)
8163 fixupPHIOpBanks(*MI, MRI, RBI);
8164 }
8165}
8166
8167namespace llvm {
8168InstructionSelector *
8170 const AArch64Subtarget &Subtarget,
8171 const AArch64RegisterBankInfo &RBI) {
8172 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8173}
8174}
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
static MachineBasicBlock * emitSelect(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit SELECT instruction, using ISEL if available, otherwise use branch-based control flow.
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:119
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1436
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:978
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:755
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:754
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:752
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:749
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:753
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:750
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
bool isIntPredicate() const
Definition InstrTypes.h:846
bool isUnsigned() const
Definition InstrTypes.h:999
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:463
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:476
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:467
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:579
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:354
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
LLT getScalarType() const
constexpr bool isPointerVector() const
constexpr bool isInteger() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
void constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
def_instr_iterator def_instr_begin(Register RegNo) const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:993
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
constexpr double e
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:861
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:60
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:656
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:464
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:297
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:159
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:497
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:317
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:442
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:436
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:472
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:504
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.