LLVM 23.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
318 MachineIRBuilder &MIRBuilder) const;
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323 const RegisterBank &DstRB, LLT ScalarTy,
324 Register VecReg, unsigned LaneIdx,
325 MachineIRBuilder &MIRBuilder) const;
326 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
328 MachineIRBuilder &MIRBuilder) const;
329 /// Emit a CSet for a FP compare.
330 ///
331 /// \p Dst is expected to be a 32-bit scalar register.
332 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333 MachineIRBuilder &MIRBuilder) const;
334
335 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336 /// Might elide the instruction if the previous instruction already sets NZCV
337 /// correctly.
338 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340 /// Emit the overflow op for \p Opcode.
341 ///
342 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343 /// G_USUBO, etc.
344 std::pair<MachineInstr *, AArch64CC::CondCode>
345 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351 /// In some cases this is even possible with OR operations in the expression.
353 MachineIRBuilder &MIB) const;
358 MachineIRBuilder &MIB) const;
360 bool Negate, Register CCOp,
362 MachineIRBuilder &MIB) const;
363
364 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365 /// \p IsNegative is true if the test should be "not zero".
366 /// This will also optimize the test bit instruction when possible.
367 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
368 MachineBasicBlock *DstMBB,
369 MachineIRBuilder &MIB) const;
370
371 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
372 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
373 MachineBasicBlock *DestMBB,
374 MachineIRBuilder &MIB) const;
375
376 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377 // We use these manually instead of using the importer since it doesn't
378 // support SDNodeXForm.
379 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389 unsigned Size) const;
390
391 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 1);
393 }
394 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 2);
396 }
397 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 4);
399 }
400 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 8);
402 }
403 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404 return selectAddrModeUnscaled(Root, 16);
405 }
406
407 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408 /// from complex pattern matchers like selectAddrModeIndexed().
409 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410 MachineRegisterInfo &MRI) const;
411
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413 unsigned Size) const;
414 template <int Width>
415 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416 return selectAddrModeIndexed(Root, Width / 8);
417 }
418
419 std::optional<bool>
420 isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421 const MachineRegisterInfo &MRI) const;
422
423 bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424 const MachineRegisterInfo &MRI,
425 bool IsAddrOperand) const;
426 ComplexRendererFns
427 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429
430 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431 /// or not a shift + extend should be folded into an addressing mode. Returns
432 /// None when this is not profitable or possible.
433 ComplexRendererFns
434 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435 MachineOperand &Offset, unsigned SizeInBytes,
436 bool WantsExt) const;
437 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442 return selectAddrModeXRO(Root, Width / 8);
443 }
444
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446 unsigned SizeInBytes) const;
447 template <int Width>
448 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449 return selectAddrModeWRO(Root, Width / 8);
450 }
451
452 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453 bool AllowROR = false) const;
454
455 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456 return selectShiftedRegister(Root);
457 }
458
459 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460 return selectShiftedRegister(Root, true);
461 }
462
463 /// Given an extend instruction, determine the correct shift-extend type for
464 /// that instruction.
465 ///
466 /// If the instruction is going to be used in a load or store, pass
467 /// \p IsLoadStore = true.
469 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470 bool IsLoadStore = false) const;
471
472 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473 ///
474 /// \returns Either \p Reg if no change was necessary, or the new register
475 /// created by moving \p Reg.
476 ///
477 /// Note: This uses emitCopy right now.
478 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479 MachineIRBuilder &MIB) const;
480
481 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485 ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486 ComplexRendererFns
487 selectCVTFixedPosRecipOperandVec(MachineOperand &Root) const;
488 ComplexRendererFns
489 selectCVTFixedPointVecBase(const MachineOperand &Root,
490 bool isReciprocal = false) const;
491 void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFixedPointRecipXForm(MachineInstrBuilder &MIB,
494 const MachineInstr &MI, int OpIdx = -1) const;
495
496 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
497 int OpIdx = -1) const;
498 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
499 int OpIdx = -1) const;
500 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
501 int OpIdx = -1) const;
502 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
503 int OpIdx) const;
504 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
505 int OpIdx = -1) const;
506 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
507 int OpIdx = -1) const;
508 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
509 int OpIdx = -1) const;
510 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
511 const MachineInstr &MI,
512 int OpIdx = -1) const;
513
514 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
515 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
516
517 // Optimization methods.
518 bool tryOptSelect(GSelect &Sel);
519 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
520 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
522 MachineIRBuilder &MIRBuilder) const;
523
524 /// Return true if \p MI is a load or store of \p NumBytes bytes.
525 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
526
527 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
528 /// register zeroed out. In other words, the result of MI has been explicitly
529 /// zero extended.
530 bool isDef32(const MachineInstr &MI) const;
531
532 const AArch64TargetMachine &TM;
533 const AArch64Subtarget &STI;
534 const AArch64InstrInfo &TII;
536 const AArch64RegisterBankInfo &RBI;
537
538 bool ProduceNonFlagSettingCondBr = false;
539
540 // Some cached values used during selection.
541 // We use LR as a live-in register, and we keep track of it here as it can be
542 // clobbered by calls.
543 Register MFReturnAddr;
544
546
547#define GET_GLOBALISEL_PREDICATES_DECL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_PREDICATES_DECL
550
551// We declare the temporaries used by selectImpl() in the class to minimize the
552// cost of constructing placeholder values.
553#define GET_GLOBALISEL_TEMPORARIES_DECL
554#include "AArch64GenGlobalISel.inc"
555#undef GET_GLOBALISEL_TEMPORARIES_DECL
556};
557
558} // end anonymous namespace
559
560#define GET_GLOBALISEL_IMPL
561#include "AArch64GenGlobalISel.inc"
562#undef GET_GLOBALISEL_IMPL
563
564AArch64InstructionSelector::AArch64InstructionSelector(
565 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
566 const AArch64RegisterBankInfo &RBI)
567 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
568 RBI(RBI),
570#include "AArch64GenGlobalISel.inc"
573#include "AArch64GenGlobalISel.inc"
575{
576}
577
578// FIXME: This should be target-independent, inferred from the types declared
579// for each class in the bank.
580//
581/// Given a register bank, and a type, return the smallest register class that
582/// can represent that combination.
583static const TargetRegisterClass *
584getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
585 bool GetAllRegSet = false) {
586 if (RB.getID() == AArch64::GPRRegBankID) {
587 if (Ty.getSizeInBits() <= 32)
588 return GetAllRegSet ? &AArch64::GPR32allRegClass
589 : &AArch64::GPR32RegClass;
590 if (Ty.getSizeInBits() == 64)
591 return GetAllRegSet ? &AArch64::GPR64allRegClass
592 : &AArch64::GPR64RegClass;
593 if (Ty.getSizeInBits() == 128)
594 return &AArch64::XSeqPairsClassRegClass;
595 return nullptr;
596 }
597
598 if (RB.getID() == AArch64::FPRRegBankID) {
599 switch (Ty.getSizeInBits()) {
600 case 8:
601 return &AArch64::FPR8RegClass;
602 case 16:
603 return &AArch64::FPR16RegClass;
604 case 32:
605 return &AArch64::FPR32RegClass;
606 case 64:
607 return &AArch64::FPR64RegClass;
608 case 128:
609 return &AArch64::FPR128RegClass;
610 }
611 return nullptr;
612 }
613
614 return nullptr;
615}
616
617/// Given a register bank, and size in bits, return the smallest register class
618/// that can represent that combination.
619static const TargetRegisterClass *
621 bool GetAllRegSet = false) {
622 if (SizeInBits.isScalable()) {
623 assert(RB.getID() == AArch64::FPRRegBankID &&
624 "Expected FPR regbank for scalable type size");
625 return &AArch64::ZPRRegClass;
626 }
627
628 unsigned RegBankID = RB.getID();
629
630 if (RegBankID == AArch64::GPRRegBankID) {
631 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
632 if (SizeInBits <= 32)
633 return GetAllRegSet ? &AArch64::GPR32allRegClass
634 : &AArch64::GPR32RegClass;
635 if (SizeInBits == 64)
636 return GetAllRegSet ? &AArch64::GPR64allRegClass
637 : &AArch64::GPR64RegClass;
638 if (SizeInBits == 128)
639 return &AArch64::XSeqPairsClassRegClass;
640 }
641
642 if (RegBankID == AArch64::FPRRegBankID) {
643 if (SizeInBits.isScalable()) {
644 assert(SizeInBits == TypeSize::getScalable(128) &&
645 "Unexpected scalable register size");
646 return &AArch64::ZPRRegClass;
647 }
648
649 switch (SizeInBits) {
650 default:
651 return nullptr;
652 case 8:
653 return &AArch64::FPR8RegClass;
654 case 16:
655 return &AArch64::FPR16RegClass;
656 case 32:
657 return &AArch64::FPR32RegClass;
658 case 64:
659 return &AArch64::FPR64RegClass;
660 case 128:
661 return &AArch64::FPR128RegClass;
662 }
663 }
664
665 return nullptr;
666}
667
668/// Returns the correct subregister to use for a given register class.
670 const TargetRegisterInfo &TRI, unsigned &SubReg) {
671 switch (TRI.getRegSizeInBits(*RC)) {
672 case 8:
673 SubReg = AArch64::bsub;
674 break;
675 case 16:
676 SubReg = AArch64::hsub;
677 break;
678 case 32:
679 if (RC != &AArch64::FPR32RegClass)
680 SubReg = AArch64::sub_32;
681 else
682 SubReg = AArch64::ssub;
683 break;
684 case 64:
685 SubReg = AArch64::dsub;
686 break;
687 default:
689 dbgs() << "Couldn't find appropriate subregister for register class.");
690 return false;
691 }
692
693 return true;
694}
695
696/// Returns the minimum size the given register bank can hold.
697static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
698 switch (RB.getID()) {
699 case AArch64::GPRRegBankID:
700 return 32;
701 case AArch64::FPRRegBankID:
702 return 8;
703 default:
704 llvm_unreachable("Tried to get minimum size for unknown register bank.");
705 }
706}
707
708/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
709/// Helper function for functions like createDTuple and createQTuple.
710///
711/// \p RegClassIDs - The list of register class IDs available for some tuple of
712/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
713/// expected to contain between 2 and 4 tuple classes.
714///
715/// \p SubRegs - The list of subregister classes associated with each register
716/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
717/// subregister class. The index of each subregister class is expected to
718/// correspond with the index of each register class.
719///
720/// \returns Either the destination register of REG_SEQUENCE instruction that
721/// was created, or the 0th element of \p Regs if \p Regs contains a single
722/// element.
724 const unsigned RegClassIDs[],
725 const unsigned SubRegs[], MachineIRBuilder &MIB) {
726 unsigned NumRegs = Regs.size();
727 if (NumRegs == 1)
728 return Regs[0];
729 assert(NumRegs >= 2 && NumRegs <= 4 &&
730 "Only support between two and 4 registers in a tuple!");
732 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
733 auto RegSequence =
734 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
735 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
736 RegSequence.addUse(Regs[I]);
737 RegSequence.addImm(SubRegs[I]);
738 }
739 return RegSequence.getReg(0);
740}
741
742/// Create a tuple of D-registers using the registers in \p Regs.
744 static const unsigned RegClassIDs[] = {
745 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
746 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
747 AArch64::dsub2, AArch64::dsub3};
748 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
749}
750
751/// Create a tuple of Q-registers using the registers in \p Regs.
753 static const unsigned RegClassIDs[] = {
754 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
755 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
756 AArch64::qsub2, AArch64::qsub3};
757 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
758}
759
760static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
761 auto &MI = *Root.getParent();
762 auto &MBB = *MI.getParent();
763 auto &MF = *MBB.getParent();
764 auto &MRI = MF.getRegInfo();
765 uint64_t Immed;
766 if (Root.isImm())
767 Immed = Root.getImm();
768 else if (Root.isCImm())
769 Immed = Root.getCImm()->getZExtValue();
770 else if (Root.isReg()) {
771 auto ValAndVReg =
773 if (!ValAndVReg)
774 return std::nullopt;
775 Immed = ValAndVReg->Value.getSExtValue();
776 } else
777 return std::nullopt;
778 return Immed;
779}
780
781/// Check whether \p I is a currently unsupported binary operation:
782/// - it has an unsized type
783/// - an operand is not a vreg
784/// - all operands are not in the same bank
785/// These are checks that should someday live in the verifier, but right now,
786/// these are mostly limitations of the aarch64 selector.
787static bool unsupportedBinOp(const MachineInstr &I,
788 const AArch64RegisterBankInfo &RBI,
789 const MachineRegisterInfo &MRI,
790 const AArch64RegisterInfo &TRI) {
791 LLT Ty = MRI.getType(I.getOperand(0).getReg());
792 if (!Ty.isValid()) {
793 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
794 return true;
795 }
796
797 const RegisterBank *PrevOpBank = nullptr;
798 for (auto &MO : I.operands()) {
799 // FIXME: Support non-register operands.
800 if (!MO.isReg()) {
801 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
802 return true;
803 }
804
805 // FIXME: Can generic operations have physical registers operands? If
806 // so, this will need to be taught about that, and we'll need to get the
807 // bank out of the minimal class for the register.
808 // Either way, this needs to be documented (and possibly verified).
809 if (!MO.getReg().isVirtual()) {
810 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
811 return true;
812 }
813
814 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
815 if (!OpBank) {
816 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
817 return true;
818 }
819
820 if (PrevOpBank && OpBank != PrevOpBank) {
821 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
822 return true;
823 }
824 PrevOpBank = OpBank;
825 }
826 return false;
827}
828
829/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
830/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
831/// and of size \p OpSize.
832/// \returns \p GenericOpc if the combination is unsupported.
833static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
834 unsigned OpSize) {
835 switch (RegBankID) {
836 case AArch64::GPRRegBankID:
837 if (OpSize == 32) {
838 switch (GenericOpc) {
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVWr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVWr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVWr;
845 default:
846 return GenericOpc;
847 }
848 } else if (OpSize == 64) {
849 switch (GenericOpc) {
850 case TargetOpcode::G_PTR_ADD:
851 return AArch64::ADDXrr;
852 case TargetOpcode::G_SHL:
853 return AArch64::LSLVXr;
854 case TargetOpcode::G_LSHR:
855 return AArch64::LSRVXr;
856 case TargetOpcode::G_ASHR:
857 return AArch64::ASRVXr;
858 default:
859 return GenericOpc;
860 }
861 }
862 break;
863 case AArch64::FPRRegBankID:
864 switch (OpSize) {
865 case 32:
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDSrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBSrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULSrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVSrr;
875 default:
876 return GenericOpc;
877 }
878 case 64:
879 switch (GenericOpc) {
880 case TargetOpcode::G_FADD:
881 return AArch64::FADDDrr;
882 case TargetOpcode::G_FSUB:
883 return AArch64::FSUBDrr;
884 case TargetOpcode::G_FMUL:
885 return AArch64::FMULDrr;
886 case TargetOpcode::G_FDIV:
887 return AArch64::FDIVDrr;
888 case TargetOpcode::G_OR:
889 return AArch64::ORRv8i8;
890 default:
891 return GenericOpc;
892 }
893 }
894 break;
895 }
896 return GenericOpc;
897}
898
899/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
900/// appropriate for the (value) register bank \p RegBankID and of memory access
901/// size \p OpSize. This returns the variant with the base+unsigned-immediate
902/// addressing mode (e.g., LDRXui).
903/// \returns \p GenericOpc if the combination is unsupported.
904static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
905 unsigned OpSize) {
906 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
907 switch (RegBankID) {
908 case AArch64::GPRRegBankID:
909 switch (OpSize) {
910 case 8:
911 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
912 case 16:
913 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
914 case 32:
915 return isStore ? AArch64::STRWui : AArch64::LDRWui;
916 case 64:
917 return isStore ? AArch64::STRXui : AArch64::LDRXui;
918 }
919 break;
920 case AArch64::FPRRegBankID:
921 switch (OpSize) {
922 case 8:
923 return isStore ? AArch64::STRBui : AArch64::LDRBui;
924 case 16:
925 return isStore ? AArch64::STRHui : AArch64::LDRHui;
926 case 32:
927 return isStore ? AArch64::STRSui : AArch64::LDRSui;
928 case 64:
929 return isStore ? AArch64::STRDui : AArch64::LDRDui;
930 case 128:
931 return isStore ? AArch64::STRQui : AArch64::LDRQui;
932 }
933 break;
934 }
935 return GenericOpc;
936}
937
938/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
939/// to \p *To.
940///
941/// E.g "To = COPY SrcReg:SubReg"
943 const RegisterBankInfo &RBI, Register SrcReg,
944 const TargetRegisterClass *To, unsigned SubReg) {
945 assert(SrcReg.isValid() && "Expected a valid source register?");
946 assert(To && "Destination register class cannot be null");
947 assert(SubReg && "Expected a valid subregister");
948
949 MachineIRBuilder MIB(I);
950 auto SubRegCopy =
951 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, {}, SubReg);
952 MachineOperand &RegOp = I.getOperand(1);
953 RegOp.setReg(SubRegCopy.getReg(0));
954
955 // It's possible that the destination register won't be constrained. Make
956 // sure that happens.
957 if (!I.getOperand(0).getReg().isPhysical())
958 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
959
960 return true;
961}
962
963/// Helper function to get the source and destination register classes for a
964/// copy. Returns a std::pair containing the source register class for the
965/// copy, and the destination register class for the copy. If a register class
966/// cannot be determined, then it will be nullptr.
967static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
970 const RegisterBankInfo &RBI) {
971 Register DstReg = I.getOperand(0).getReg();
972 Register SrcReg = I.getOperand(1).getReg();
973 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
974 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
975
976 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
977 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
978
979 // Special casing for cross-bank copies of s1s. We can technically represent
980 // a 1-bit value with any size of register. The minimum size for a GPR is 32
981 // bits. So, we need to put the FPR on 32 bits as well.
982 //
983 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
984 // then we can pull it into the helpers that get the appropriate class for a
985 // register bank. Or make a new helper that carries along some constraint
986 // information.
987 if (SrcRegBank != DstRegBank &&
988 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
989 SrcSize = DstSize = TypeSize::getFixed(32);
990
991 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
992 getMinClassForRegBank(DstRegBank, DstSize, true)};
993}
994
995// FIXME: We need some sort of API in RBI/TRI to allow generic code to
996// constrain operands of simple instructions given a TargetRegisterClass
997// and LLT
999 const RegisterBankInfo &RBI) {
1000 for (MachineOperand &MO : I.operands()) {
1001 if (!MO.isReg())
1002 continue;
1003 Register Reg = MO.getReg();
1004 if (!Reg)
1005 continue;
1006 if (Reg.isPhysical())
1007 continue;
1008 LLT Ty = MRI.getType(Reg);
1009 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
1010 const TargetRegisterClass *RC =
1012 if (!RC) {
1013 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1014 RC = getRegClassForTypeOnBank(Ty, RB);
1015 if (!RC) {
1016 LLVM_DEBUG(
1017 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1018 break;
1019 }
1020 }
1021 RBI.constrainGenericRegister(Reg, *RC, MRI);
1022 }
1023
1024 return true;
1025}
1026
1029 const RegisterBankInfo &RBI) {
1030 Register DstReg = I.getOperand(0).getReg();
1031 Register SrcReg = I.getOperand(1).getReg();
1032 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1033 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1034
1035 // Find the correct register classes for the source and destination registers.
1036 const TargetRegisterClass *SrcRC;
1037 const TargetRegisterClass *DstRC;
1038 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1039
1040 if (!DstRC) {
1041 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1042 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1043 return false;
1044 }
1045
1046 // Is this a copy? If so, then we may need to insert a subregister copy.
1047 if (I.isCopy()) {
1048 // Yes. Check if there's anything to fix up.
1049 if (!SrcRC) {
1050 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1051 return false;
1052 }
1053
1054 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1055 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1056 unsigned SrcSubReg = I.getOperand(1).getSubReg();
1057 unsigned SubReg;
1058
1059 if (SrcSubReg)
1060 return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1061
1062 // If the source bank doesn't support a subregister copy small enough,
1063 // then we first need to copy to the destination bank.
1064 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1065 const TargetRegisterClass *DstTempRC =
1066 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1067 getSubRegForClass(DstRC, TRI, SubReg);
1068
1069 MachineIRBuilder MIB(I);
1070 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1071 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1072 } else if (SrcSize > DstSize) {
1073 // If the source register is bigger than the destination we need to
1074 // perform a subregister copy.
1075 const TargetRegisterClass *SubRegRC =
1076 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1077 getSubRegForClass(SubRegRC, TRI, SubReg);
1078 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1079 } else if (DstSize > SrcSize) {
1080 // If the destination register is bigger than the source we need to do
1081 // a promotion using SUBREG_TO_REG.
1082 const TargetRegisterClass *PromotionRC =
1083 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1084 getSubRegForClass(SrcRC, TRI, SubReg);
1085
1086 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1087 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1088 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1089 .addUse(SrcReg)
1090 .addImm(SubReg);
1091 MachineOperand &RegOp = I.getOperand(1);
1092 RegOp.setReg(PromoteReg);
1093 }
1094
1095 // If the destination is a physical register, then there's nothing to
1096 // change, so we're done.
1097 if (DstReg.isPhysical())
1098 return true;
1099 }
1100
1101 // No need to constrain SrcReg. It will get constrained when we hit another
1102 // of its use or its defs. Copies do not have constraints.
1103 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1104 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1105 << " operand\n");
1106 return false;
1107 }
1108
1109 // If this a GPR ZEXT that we want to just reduce down into a copy.
1110 // The sizes will be mismatched with the source < 32b but that's ok.
1111 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1112 I.setDesc(TII.get(AArch64::COPY));
1113 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1114 return selectCopy(I, TII, MRI, TRI, RBI);
1115 }
1116
1117 I.setDesc(TII.get(AArch64::COPY));
1118 return true;
1119}
1120
1122AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1123 Register False, AArch64CC::CondCode CC,
1124 MachineIRBuilder &MIB) const {
1125 MachineRegisterInfo &MRI = *MIB.getMRI();
1126 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1127 RBI.getRegBank(True, MRI, TRI)->getID() &&
1128 "Expected both select operands to have the same regbank?");
1129 LLT Ty = MRI.getType(True);
1130 if (Ty.isVector())
1131 return nullptr;
1132 const unsigned Size = Ty.getSizeInBits();
1133 assert((Size == 32 || Size == 64) &&
1134 "Expected 32 bit or 64 bit select only?");
1135 const bool Is32Bit = Size == 32;
1136 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1137 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1138 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1140 return &*FCSel;
1141 }
1142
1143 // By default, we'll try and emit a CSEL.
1144 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1145 bool Optimized = false;
1146 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1147 &Optimized](Register &Reg, Register &OtherReg,
1148 bool Invert) {
1149 if (Optimized)
1150 return false;
1151
1152 // Attempt to fold:
1153 //
1154 // %sub = G_SUB 0, %x
1155 // %select = G_SELECT cc, %reg, %sub
1156 //
1157 // Into:
1158 // %select = CSNEG %reg, %x, cc
1159 Register MatchReg;
1160 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1161 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1162 Reg = MatchReg;
1163 if (Invert) {
1165 std::swap(Reg, OtherReg);
1166 }
1167 return true;
1168 }
1169
1170 // Attempt to fold:
1171 //
1172 // %xor = G_XOR %x, -1
1173 // %select = G_SELECT cc, %reg, %xor
1174 //
1175 // Into:
1176 // %select = CSINV %reg, %x, cc
1177 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1178 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1179 Reg = MatchReg;
1180 if (Invert) {
1182 std::swap(Reg, OtherReg);
1183 }
1184 return true;
1185 }
1186
1187 // Attempt to fold:
1188 //
1189 // %add = G_ADD %x, 1
1190 // %select = G_SELECT cc, %reg, %add
1191 //
1192 // Into:
1193 // %select = CSINC %reg, %x, cc
1194 if (mi_match(Reg, MRI,
1195 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1196 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1197 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1198 Reg = MatchReg;
1199 if (Invert) {
1201 std::swap(Reg, OtherReg);
1202 }
1203 return true;
1204 }
1205
1206 return false;
1207 };
1208
1209 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1210 // true/false values are constants.
1211 // FIXME: All of these patterns already exist in tablegen. We should be
1212 // able to import these.
1213 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1214 &Optimized]() {
1215 if (Optimized)
1216 return false;
1217 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1218 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1219 if (!TrueCst && !FalseCst)
1220 return false;
1221
1222 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1223 if (TrueCst && FalseCst) {
1224 int64_t T = TrueCst->Value.getSExtValue();
1225 int64_t F = FalseCst->Value.getSExtValue();
1226
1227 if (T == 0 && F == 1) {
1228 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1229 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1230 True = ZReg;
1231 False = ZReg;
1232 return true;
1233 }
1234
1235 if (T == 0 && F == -1) {
1236 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1237 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1238 True = ZReg;
1239 False = ZReg;
1240 return true;
1241 }
1242 }
1243
1244 if (TrueCst) {
1245 int64_t T = TrueCst->Value.getSExtValue();
1246 if (T == 1) {
1247 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1248 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1249 True = False;
1250 False = ZReg;
1252 return true;
1253 }
1254
1255 if (T == -1) {
1256 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1257 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1258 True = False;
1259 False = ZReg;
1261 return true;
1262 }
1263 }
1264
1265 if (FalseCst) {
1266 int64_t F = FalseCst->Value.getSExtValue();
1267 if (F == 1) {
1268 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1269 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1270 False = ZReg;
1271 return true;
1272 }
1273
1274 if (F == -1) {
1275 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1276 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1277 False = ZReg;
1278 return true;
1279 }
1280 }
1281 return false;
1282 };
1283
1284 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1285 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1286 Optimized |= TryOptSelectCst();
1287 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1288 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1289 return &*SelectInst;
1290}
1291
1294 MachineRegisterInfo *MRI = nullptr) {
1295 switch (P) {
1296 default:
1297 llvm_unreachable("Unknown condition code!");
1298 case CmpInst::ICMP_NE:
1299 return AArch64CC::NE;
1300 case CmpInst::ICMP_EQ:
1301 return AArch64CC::EQ;
1302 case CmpInst::ICMP_SGT:
1303 return AArch64CC::GT;
1304 case CmpInst::ICMP_SGE:
1305 if (RHS && MRI) {
1306 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1307 if (ValAndVReg && ValAndVReg->Value == 0)
1308 return AArch64CC::PL;
1309 }
1310 return AArch64CC::GE;
1311 case CmpInst::ICMP_SLT:
1312 if (RHS && MRI) {
1313 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1314 if (ValAndVReg && ValAndVReg->Value == 0)
1315 return AArch64CC::MI;
1316 }
1317 return AArch64CC::LT;
1318 case CmpInst::ICMP_SLE:
1319 return AArch64CC::LE;
1320 case CmpInst::ICMP_UGT:
1321 return AArch64CC::HI;
1322 case CmpInst::ICMP_UGE:
1323 return AArch64CC::HS;
1324 case CmpInst::ICMP_ULT:
1325 return AArch64CC::LO;
1326 case CmpInst::ICMP_ULE:
1327 return AArch64CC::LS;
1328 }
1329}
1330
1331/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1333 AArch64CC::CondCode &CondCode,
1334 AArch64CC::CondCode &CondCode2) {
1335 CondCode2 = AArch64CC::AL;
1336 switch (CC) {
1337 default:
1338 llvm_unreachable("Unknown FP condition!");
1339 case CmpInst::FCMP_OEQ:
1340 CondCode = AArch64CC::EQ;
1341 break;
1342 case CmpInst::FCMP_OGT:
1343 CondCode = AArch64CC::GT;
1344 break;
1345 case CmpInst::FCMP_OGE:
1346 CondCode = AArch64CC::GE;
1347 break;
1348 case CmpInst::FCMP_OLT:
1349 CondCode = AArch64CC::MI;
1350 break;
1351 case CmpInst::FCMP_OLE:
1352 CondCode = AArch64CC::LS;
1353 break;
1354 case CmpInst::FCMP_ONE:
1355 CondCode = AArch64CC::MI;
1356 CondCode2 = AArch64CC::GT;
1357 break;
1358 case CmpInst::FCMP_ORD:
1359 CondCode = AArch64CC::VC;
1360 break;
1361 case CmpInst::FCMP_UNO:
1362 CondCode = AArch64CC::VS;
1363 break;
1364 case CmpInst::FCMP_UEQ:
1365 CondCode = AArch64CC::EQ;
1366 CondCode2 = AArch64CC::VS;
1367 break;
1368 case CmpInst::FCMP_UGT:
1369 CondCode = AArch64CC::HI;
1370 break;
1371 case CmpInst::FCMP_UGE:
1372 CondCode = AArch64CC::PL;
1373 break;
1374 case CmpInst::FCMP_ULT:
1375 CondCode = AArch64CC::LT;
1376 break;
1377 case CmpInst::FCMP_ULE:
1378 CondCode = AArch64CC::LE;
1379 break;
1380 case CmpInst::FCMP_UNE:
1381 CondCode = AArch64CC::NE;
1382 break;
1383 }
1384}
1385
1386/// Convert an IR fp condition code to an AArch64 CC.
1387/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1388/// should be AND'ed instead of OR'ed.
1390 AArch64CC::CondCode &CondCode,
1391 AArch64CC::CondCode &CondCode2) {
1392 CondCode2 = AArch64CC::AL;
1393 switch (CC) {
1394 default:
1395 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1396 assert(CondCode2 == AArch64CC::AL);
1397 break;
1398 case CmpInst::FCMP_ONE:
1399 // (a one b)
1400 // == ((a olt b) || (a ogt b))
1401 // == ((a ord b) && (a une b))
1402 CondCode = AArch64CC::VC;
1403 CondCode2 = AArch64CC::NE;
1404 break;
1405 case CmpInst::FCMP_UEQ:
1406 // (a ueq b)
1407 // == ((a uno b) || (a oeq b))
1408 // == ((a ule b) && (a uge b))
1409 CondCode = AArch64CC::PL;
1410 CondCode2 = AArch64CC::LE;
1411 break;
1412 }
1413}
1414
1415/// Return a register which can be used as a bit to test in a TB(N)Z.
1416static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1417 MachineRegisterInfo &MRI) {
1418 assert(Reg.isValid() && "Expected valid register!");
1419 bool HasZext = false;
1420 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1421 unsigned Opc = MI->getOpcode();
1422
1423 if (!MI->getOperand(0).isReg() ||
1424 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1425 break;
1426
1427 // (tbz (any_ext x), b) -> (tbz x, b) and
1428 // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
1429 //
1430 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1431 // on the truncated x is the same as the bit number on x.
1432 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1433 Opc == TargetOpcode::G_TRUNC) {
1434 if (Opc == TargetOpcode::G_ZEXT)
1435 HasZext = true;
1436
1437 Register NextReg = MI->getOperand(1).getReg();
1438 // Did we find something worth folding?
1439 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1440 break;
1441 TypeSize InSize = MRI.getType(NextReg).getSizeInBits();
1442 if (Bit >= InSize)
1443 break;
1444
1445 // NextReg is worth folding. Keep looking.
1446 Reg = NextReg;
1447 continue;
1448 }
1449
1450 // Attempt to find a suitable operation with a constant on one side.
1451 std::optional<uint64_t> C;
1452 Register TestReg;
1453 switch (Opc) {
1454 default:
1455 break;
1456 case TargetOpcode::G_AND:
1457 case TargetOpcode::G_XOR: {
1458 TestReg = MI->getOperand(1).getReg();
1459 Register ConstantReg = MI->getOperand(2).getReg();
1460 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1461 if (!VRegAndVal) {
1462 // AND commutes, check the other side for a constant.
1463 // FIXME: Can we canonicalize the constant so that it's always on the
1464 // same side at some point earlier?
1465 std::swap(ConstantReg, TestReg);
1466 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1467 }
1468 if (VRegAndVal) {
1469 if (HasZext)
1470 C = VRegAndVal->Value.getZExtValue();
1471 else
1472 C = VRegAndVal->Value.getSExtValue();
1473 }
1474 break;
1475 }
1476 case TargetOpcode::G_ASHR:
1477 case TargetOpcode::G_LSHR:
1478 case TargetOpcode::G_SHL: {
1479 TestReg = MI->getOperand(1).getReg();
1480 auto VRegAndVal =
1481 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1482 if (VRegAndVal)
1483 C = VRegAndVal->Value.getSExtValue();
1484 break;
1485 }
1486 }
1487
1488 // Didn't find a constant or viable register. Bail out of the loop.
1489 if (!C || !TestReg.isValid())
1490 break;
1491
1492 // We found a suitable instruction with a constant. Check to see if we can
1493 // walk through the instruction.
1494 Register NextReg;
1495 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1496 switch (Opc) {
1497 default:
1498 break;
1499 case TargetOpcode::G_AND:
1500 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1501 if ((*C >> Bit) & 1)
1502 NextReg = TestReg;
1503 break;
1504 case TargetOpcode::G_SHL:
1505 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1506 // the type of the register.
1507 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1508 NextReg = TestReg;
1509 Bit = Bit - *C;
1510 }
1511 break;
1512 case TargetOpcode::G_ASHR:
1513 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1514 // in x
1515 NextReg = TestReg;
1516 Bit = Bit + *C;
1517 if (Bit >= TestRegSize)
1518 Bit = TestRegSize - 1;
1519 break;
1520 case TargetOpcode::G_LSHR:
1521 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1522 if ((Bit + *C) < TestRegSize) {
1523 NextReg = TestReg;
1524 Bit = Bit + *C;
1525 }
1526 break;
1527 case TargetOpcode::G_XOR:
1528 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1529 // appropriate.
1530 //
1531 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1532 //
1533 // tbz x', b -> tbnz x, b
1534 //
1535 // Because x' only has the b-th bit set if x does not.
1536 if ((*C >> Bit) & 1)
1537 Invert = !Invert;
1538 NextReg = TestReg;
1539 break;
1540 }
1541
1542 // Check if we found anything worth folding.
1543 if (!NextReg.isValid())
1544 return Reg;
1545 Reg = NextReg;
1546 }
1547
1548 return Reg;
1549}
1550
1551MachineInstr *AArch64InstructionSelector::emitTestBit(
1552 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1553 MachineIRBuilder &MIB) const {
1554 assert(TestReg.isValid());
1555 assert(ProduceNonFlagSettingCondBr &&
1556 "Cannot emit TB(N)Z with speculation tracking!");
1557 MachineRegisterInfo &MRI = *MIB.getMRI();
1558
1559 // Attempt to optimize the test bit by walking over instructions.
1560 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1561 LLT Ty = MRI.getType(TestReg);
1562 unsigned Size = Ty.getSizeInBits();
1563 assert(!Ty.isVector() && "Expected a scalar!");
1564 assert(Bit < 64 && "Bit is too large!");
1565
1566 // When the test register is a 64-bit register, we have to narrow to make
1567 // TBNZW work.
1568 bool UseWReg = Bit < 32;
1569 unsigned NecessarySize = UseWReg ? 32 : 64;
1570 if (Size != NecessarySize)
1571 TestReg = moveScalarRegClass(
1572 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1573 MIB);
1574
1575 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1576 {AArch64::TBZW, AArch64::TBNZW}};
1577 unsigned Opc = OpcTable[UseWReg][IsNegative];
1578 auto TestBitMI =
1579 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1580 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1581 return &*TestBitMI;
1582}
1583
1584bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1585 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1586 MachineIRBuilder &MIB) const {
1587 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1588 // Given something like this:
1589 //
1590 // %x = ...Something...
1591 // %one = G_CONSTANT i64 1
1592 // %zero = G_CONSTANT i64 0
1593 // %and = G_AND %x, %one
1594 // %cmp = G_ICMP intpred(ne), %and, %zero
1595 // %cmp_trunc = G_TRUNC %cmp
1596 // G_BRCOND %cmp_trunc, %bb.3
1597 //
1598 // We want to try and fold the AND into the G_BRCOND and produce either a
1599 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1600 //
1601 // In this case, we'd get
1602 //
1603 // TBNZ %x %bb.3
1604 //
1605
1606 // Check if the AND has a constant on its RHS which we can use as a mask.
1607 // If it's a power of 2, then it's the same as checking a specific bit.
1608 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1609 auto MaybeBit = getIConstantVRegValWithLookThrough(
1610 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1611 if (!MaybeBit)
1612 return false;
1613
1614 int32_t Bit = MaybeBit->Value.exactLogBase2();
1615 if (Bit < 0)
1616 return false;
1617
1618 Register TestReg = AndInst.getOperand(1).getReg();
1619
1620 // Emit a TB(N)Z.
1621 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1622 return true;
1623}
1624
1625MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1626 bool IsNegative,
1627 MachineBasicBlock *DestMBB,
1628 MachineIRBuilder &MIB) const {
1629 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1630 MachineRegisterInfo &MRI = *MIB.getMRI();
1631 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1632 AArch64::GPRRegBankID &&
1633 "Expected GPRs only?");
1634 auto Ty = MRI.getType(CompareReg);
1635 unsigned Width = Ty.getSizeInBits();
1636 assert(!Ty.isVector() && "Expected scalar only?");
1637 assert(Width <= 64 && "Expected width to be at most 64?");
1638 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1639 {AArch64::CBNZW, AArch64::CBNZX}};
1640 unsigned Opc = OpcTable[IsNegative][Width == 64];
1641 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1642 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1643 return &*BranchMI;
1644}
1645
1646bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1647 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1648 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1649 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1650 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1651 // totally clean. Some of them require two branches to implement.
1652 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1653 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1654 Pred);
1655 AArch64CC::CondCode CC1, CC2;
1656 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1657 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1658 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1659 if (CC2 != AArch64CC::AL)
1660 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1661 I.eraseFromParent();
1662 return true;
1663}
1664
1665bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1666 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1667 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1668 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1669 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1670 //
1671 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1672 // instructions will not be produced, as they are conditional branch
1673 // instructions that do not set flags.
1674 if (!ProduceNonFlagSettingCondBr)
1675 return false;
1676
1677 MachineRegisterInfo &MRI = *MIB.getMRI();
1678 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1679 auto Pred =
1680 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1681 Register LHS = ICmp.getOperand(2).getReg();
1682 Register RHS = ICmp.getOperand(3).getReg();
1683
1684 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1685 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1686 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1687
1688 // When we can emit a TB(N)Z, prefer that.
1689 //
1690 // Handle non-commutative condition codes first.
1691 // Note that we don't want to do this when we have a G_AND because it can
1692 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1693 if (VRegAndVal && !AndInst) {
1694 int64_t C = VRegAndVal->Value.getSExtValue();
1695
1696 // When we have a greater-than comparison, we can just test if the msb is
1697 // zero.
1698 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1699 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1700 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1701 I.eraseFromParent();
1702 return true;
1703 }
1704
1705 // When we have a less than comparison, we can just test if the msb is not
1706 // zero.
1707 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1708 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1709 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1710 I.eraseFromParent();
1711 return true;
1712 }
1713
1714 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1715 // we can test if the msb is zero.
1716 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1717 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1718 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1719 I.eraseFromParent();
1720 return true;
1721 }
1722 }
1723
1724 // Attempt to handle commutative condition codes. Right now, that's only
1725 // eq/ne.
1726 if (ICmpInst::isEquality(Pred)) {
1727 if (!VRegAndVal) {
1728 std::swap(RHS, LHS);
1729 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1730 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1731 }
1732
1733 if (VRegAndVal && VRegAndVal->Value == 0) {
1734 // If there's a G_AND feeding into this branch, try to fold it away by
1735 // emitting a TB(N)Z instead.
1736 //
1737 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1738 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1739 // would be redundant.
1740 if (AndInst &&
1741 tryOptAndIntoCompareBranch(
1742 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1743 I.eraseFromParent();
1744 return true;
1745 }
1746
1747 // Otherwise, try to emit a CB(N)Z instead.
1748 auto LHSTy = MRI.getType(LHS);
1749 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1750 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1751 I.eraseFromParent();
1752 return true;
1753 }
1754 }
1755 }
1756
1757 return false;
1758}
1759
1760bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1761 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1762 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1763 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1764 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1765 return true;
1766
1767 // Couldn't optimize. Emit a compare + a Bcc.
1768 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1769 auto &PredOp = ICmp.getOperand(1);
1770 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1772 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1773 ICmp.getOperand(3).getReg(), MIB.getMRI());
1774 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1775 I.eraseFromParent();
1776 return true;
1777}
1778
1779bool AArch64InstructionSelector::selectCompareBranch(
1780 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1781 Register CondReg = I.getOperand(0).getReg();
1782 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1783 // Try to select the G_BRCOND using whatever is feeding the condition if
1784 // possible.
1785 unsigned CCMIOpc = CCMI->getOpcode();
1786 if (CCMIOpc == TargetOpcode::G_FCMP)
1787 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1788 if (CCMIOpc == TargetOpcode::G_ICMP)
1789 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1790
1791 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1792 // instructions will not be produced, as they are conditional branch
1793 // instructions that do not set flags.
1794 if (ProduceNonFlagSettingCondBr) {
1795 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1796 I.getOperand(1).getMBB(), MIB);
1797 I.eraseFromParent();
1798 return true;
1799 }
1800
1801 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1802 auto TstMI =
1803 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1805 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1807 .addMBB(I.getOperand(1).getMBB());
1808 I.eraseFromParent();
1810 return true;
1811}
1812
1813/// Returns the element immediate value of a vector shift operand if found.
1814/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1815static std::optional<int64_t> getVectorShiftImm(Register Reg,
1816 MachineRegisterInfo &MRI) {
1817 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1818 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1819 return getAArch64VectorSplatScalar(*OpMI, MRI);
1820}
1821
1822/// Matches and returns the shift immediate value for a SHL instruction given
1823/// a shift operand.
1824static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1825 MachineRegisterInfo &MRI) {
1826 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1827 if (!ShiftImm)
1828 return std::nullopt;
1829 // Check the immediate is in range for a SHL.
1830 int64_t Imm = *ShiftImm;
1831 if (Imm < 0)
1832 return std::nullopt;
1833 switch (SrcTy.getElementType().getSizeInBits()) {
1834 default:
1835 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1836 return std::nullopt;
1837 case 8:
1838 if (Imm > 7)
1839 return std::nullopt;
1840 break;
1841 case 16:
1842 if (Imm > 15)
1843 return std::nullopt;
1844 break;
1845 case 32:
1846 if (Imm > 31)
1847 return std::nullopt;
1848 break;
1849 case 64:
1850 if (Imm > 63)
1851 return std::nullopt;
1852 break;
1853 }
1854 return Imm;
1855}
1856
1857bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1858 MachineRegisterInfo &MRI) {
1859 assert(I.getOpcode() == TargetOpcode::G_SHL);
1860 Register DstReg = I.getOperand(0).getReg();
1861 const LLT Ty = MRI.getType(DstReg);
1862 Register Src1Reg = I.getOperand(1).getReg();
1863 Register Src2Reg = I.getOperand(2).getReg();
1864
1865 if (!Ty.isVector())
1866 return false;
1867
1868 // Check if we have a vector of constants on RHS that we can select as the
1869 // immediate form.
1870 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1871
1872 unsigned Opc = 0;
1873 if (Ty == LLT::fixed_vector(2, 64)) {
1874 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1875 } else if (Ty == LLT::fixed_vector(4, 32)) {
1876 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1877 } else if (Ty == LLT::fixed_vector(2, 32)) {
1878 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1879 } else if (Ty == LLT::fixed_vector(4, 16)) {
1880 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1881 } else if (Ty == LLT::fixed_vector(8, 16)) {
1882 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1883 } else if (Ty == LLT::fixed_vector(16, 8)) {
1884 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1885 } else if (Ty == LLT::fixed_vector(8, 8)) {
1886 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1887 } else {
1888 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1889 return false;
1890 }
1891
1892 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1893 if (ImmVal)
1894 Shl.addImm(*ImmVal);
1895 else
1896 Shl.addUse(Src2Reg);
1898 I.eraseFromParent();
1899 return true;
1900}
1901
1902bool AArch64InstructionSelector::selectVectorAshrLshr(
1903 MachineInstr &I, MachineRegisterInfo &MRI) {
1904 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1905 I.getOpcode() == TargetOpcode::G_LSHR);
1906 Register DstReg = I.getOperand(0).getReg();
1907 const LLT Ty = MRI.getType(DstReg);
1908 Register Src1Reg = I.getOperand(1).getReg();
1909 Register Src2Reg = I.getOperand(2).getReg();
1910
1911 if (!Ty.isVector())
1912 return false;
1913
1914 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1915
1916 // We expect the immediate case to be lowered in the PostLegalCombiner to
1917 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1918
1919 // There is not a shift right register instruction, but the shift left
1920 // register instruction takes a signed value, where negative numbers specify a
1921 // right shift.
1922
1923 unsigned Opc = 0;
1924 unsigned NegOpc = 0;
1925 const TargetRegisterClass *RC =
1926 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1927 if (Ty == LLT::fixed_vector(2, 64)) {
1928 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1929 NegOpc = AArch64::NEGv2i64;
1930 } else if (Ty == LLT::fixed_vector(4, 32)) {
1931 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1932 NegOpc = AArch64::NEGv4i32;
1933 } else if (Ty == LLT::fixed_vector(2, 32)) {
1934 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1935 NegOpc = AArch64::NEGv2i32;
1936 } else if (Ty == LLT::fixed_vector(4, 16)) {
1937 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1938 NegOpc = AArch64::NEGv4i16;
1939 } else if (Ty == LLT::fixed_vector(8, 16)) {
1940 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1941 NegOpc = AArch64::NEGv8i16;
1942 } else if (Ty == LLT::fixed_vector(16, 8)) {
1943 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1944 NegOpc = AArch64::NEGv16i8;
1945 } else if (Ty == LLT::fixed_vector(8, 8)) {
1946 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1947 NegOpc = AArch64::NEGv8i8;
1948 } else {
1949 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1950 return false;
1951 }
1952
1953 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1955 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1957 I.eraseFromParent();
1958 return true;
1959}
1960
1961bool AArch64InstructionSelector::selectVaStartAAPCS(
1962 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1963
1965 MF.getFunction().isVarArg()))
1966 return false;
1967
1968 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1969 // Standard, section 10.1.5.
1970
1971 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1972 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1973 const auto *PtrRegClass =
1974 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1975
1976 const MCInstrDesc &MCIDAddAddr =
1977 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1978 const MCInstrDesc &MCIDStoreAddr =
1979 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1980
1981 /*
1982 * typedef struct va_list {
1983 * void * stack; // next stack param
1984 * void * gr_top; // end of GP arg reg save area
1985 * void * vr_top; // end of FP/SIMD arg reg save area
1986 * int gr_offs; // offset from gr_top to next GP register arg
1987 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1988 * } va_list;
1989 */
1990 const auto VAList = I.getOperand(0).getReg();
1991
1992 // Our current offset in bytes from the va_list struct (VAList).
1993 unsigned OffsetBytes = 0;
1994
1995 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1996 // and increment OffsetBytes by PtrSize.
1997 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1998 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1999 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
2000 .addDef(Top)
2001 .addFrameIndex(FrameIndex)
2002 .addImm(Imm)
2003 .addImm(0);
2005
2006 const auto *MMO = *I.memoperands_begin();
2007 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
2008 .addUse(Top)
2009 .addUse(VAList)
2010 .addImm(OffsetBytes / PtrSize)
2012 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2013 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2015
2016 OffsetBytes += PtrSize;
2017 };
2018
2019 // void* stack at offset 0
2020 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2021
2022 // void* gr_top at offset 8 (4 on ILP32)
2023 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2024 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2025
2026 // void* vr_top at offset 16 (8 on ILP32)
2027 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2028 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2029
2030 // Helper function to store a 4-byte integer constant to VAList at offset
2031 // OffsetBytes, and increment OffsetBytes by 4.
2032 const auto PushIntConstant = [&](const int32_t Value) {
2033 constexpr int IntSize = 4;
2034 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2035 auto MIB =
2036 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2037 .addDef(Temp)
2038 .addImm(Value);
2040
2041 const auto *MMO = *I.memoperands_begin();
2042 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2043 .addUse(Temp)
2044 .addUse(VAList)
2045 .addImm(OffsetBytes / IntSize)
2047 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2048 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2050 OffsetBytes += IntSize;
2051 };
2052
2053 // int gr_offs at offset 24 (12 on ILP32)
2054 PushIntConstant(-static_cast<int32_t>(GPRSize));
2055
2056 // int vr_offs at offset 28 (16 on ILP32)
2057 PushIntConstant(-static_cast<int32_t>(FPRSize));
2058
2059 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2060
2061 I.eraseFromParent();
2062 return true;
2063}
2064
2065bool AArch64InstructionSelector::selectVaStartDarwin(
2066 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2067 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2068 Register ListReg = I.getOperand(0).getReg();
2069
2070 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2071
2072 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2073 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2075 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2076 ? FuncInfo->getVarArgsGPRIndex()
2077 : FuncInfo->getVarArgsStackIndex();
2078 }
2079
2080 auto MIB =
2081 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2082 .addDef(ArgsAddrReg)
2083 .addFrameIndex(FrameIdx)
2084 .addImm(0)
2085 .addImm(0);
2086
2088
2089 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2090 .addUse(ArgsAddrReg)
2091 .addUse(ListReg)
2092 .addImm(0)
2093 .addMemOperand(*I.memoperands_begin());
2094
2096 I.eraseFromParent();
2097 return true;
2098}
2099
2100void AArch64InstructionSelector::materializeLargeCMVal(
2101 MachineInstr &I, const Value *V, unsigned OpFlags) {
2102 MachineBasicBlock &MBB = *I.getParent();
2103 MachineFunction &MF = *MBB.getParent();
2104 MachineRegisterInfo &MRI = MF.getRegInfo();
2105
2106 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2107 MovZ->addOperand(MF, I.getOperand(1));
2108 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2110 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2112
2113 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2114 Register ForceDstReg) {
2115 Register DstReg = ForceDstReg
2116 ? ForceDstReg
2117 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2118 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2119 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2120 MovI->addOperand(MF, MachineOperand::CreateGA(
2121 GV, MovZ->getOperand(1).getOffset(), Flags));
2122 } else {
2123 MovI->addOperand(
2125 MovZ->getOperand(1).getOffset(), Flags));
2126 }
2129 return DstReg;
2130 };
2131 Register DstReg = BuildMovK(MovZ.getReg(0),
2133 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2134 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2135}
2136
2137bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2138 MachineBasicBlock &MBB = *I.getParent();
2139 MachineFunction &MF = *MBB.getParent();
2140 MachineRegisterInfo &MRI = MF.getRegInfo();
2141
2142 switch (I.getOpcode()) {
2143 case TargetOpcode::G_CONSTANT: {
2144 Register DefReg = I.getOperand(0).getReg();
2145 const LLT DefTy = MRI.getType(DefReg);
2146 if (!DefTy.isPointer())
2147 return false;
2148 const unsigned PtrSize = DefTy.getSizeInBits();
2149 if (PtrSize != 32 && PtrSize != 64)
2150 return false;
2151 // Convert pointer typed constants to integers so TableGen can select.
2152 MRI.setType(DefReg, LLT::integer(PtrSize));
2153 return true;
2154 }
2155 case TargetOpcode::G_STORE: {
2156 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2157 MachineOperand &SrcOp = I.getOperand(0);
2158 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2159 // Allow matching with imported patterns for stores of pointers. Unlike
2160 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2161 // and constrain.
2162 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2163 Register NewSrc = Copy.getReg(0);
2164 SrcOp.setReg(NewSrc);
2165 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2166 Changed = true;
2167 }
2168 return Changed;
2169 }
2170 case TargetOpcode::G_PTR_ADD: {
2171 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2172 // arithmetic semantics instead of falling back to regular arithmetic.
2173 const auto &TL = STI.getTargetLowering();
2174 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2175 return false;
2176 return convertPtrAddToAdd(I, MRI);
2177 }
2178 case TargetOpcode::G_LOAD: {
2179 // For scalar loads of pointers, we try to convert the dest type from p0
2180 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2181 // conversion, this should be ok because all users should have been
2182 // selected already, so the type doesn't matter for them.
2183 Register DstReg = I.getOperand(0).getReg();
2184 const LLT DstTy = MRI.getType(DstReg);
2185 if (!DstTy.isPointer())
2186 return false;
2187 MRI.setType(DstReg, LLT::scalar(64));
2188 return true;
2189 }
2190 case AArch64::G_DUP: {
2191 // Convert the type from p0 to s64 to help selection.
2192 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2193 if (!DstTy.isPointerVector())
2194 return false;
2195 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2196 MRI.setType(I.getOperand(0).getReg(),
2197 DstTy.changeElementType(LLT::scalar(64)));
2198 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2199 I.getOperand(1).setReg(NewSrc.getReg(0));
2200 return true;
2201 }
2202 case AArch64::G_INSERT_VECTOR_ELT: {
2203 // Convert the type from p0 to s64 to help selection.
2204 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2205 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2206 if (!SrcVecTy.isPointerVector())
2207 return false;
2208 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2209 MRI.setType(I.getOperand(1).getReg(),
2210 DstTy.changeElementType(LLT::scalar(64)));
2211 MRI.setType(I.getOperand(0).getReg(),
2212 DstTy.changeElementType(LLT::scalar(64)));
2213 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2214 I.getOperand(2).setReg(NewSrc.getReg(0));
2215 return true;
2216 }
2217 case TargetOpcode::G_UITOFP:
2218 case TargetOpcode::G_SITOFP: {
2219 // If both source and destination regbanks are FPR, then convert the opcode
2220 // to G_SITOF so that the importer can select it to an fpr variant.
2221 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2222 // copy.
2223 Register SrcReg = I.getOperand(1).getReg();
2224 LLT SrcTy = MRI.getType(SrcReg);
2225 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2226 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2227 return false;
2228
2229 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2230 // Need to add a copy to change the type so that the existing patterns can
2231 // match when there is an integer on an FPR bank.
2232 if (SrcTy.getScalarType().isInteger()) {
2233 auto Copy = MIB.buildCopy(DstTy, SrcReg);
2234 I.getOperand(1).setReg(Copy.getReg(0));
2235 MRI.setRegClass(Copy.getReg(0),
2236 getRegClassForTypeOnBank(
2237 SrcTy, RBI.getRegBank(AArch64::FPRRegBankID)));
2238 }
2239 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2240 I.setDesc(TII.get(AArch64::G_SITOF));
2241 else
2242 I.setDesc(TII.get(AArch64::G_UITOF));
2243 return true;
2244 }
2245 return false;
2246 }
2247 default:
2248 return false;
2249 }
2250}
2251
2252/// This lowering tries to look for G_PTR_ADD instructions and then converts
2253/// them to a standard G_ADD with a COPY on the source.
2254///
2255/// The motivation behind this is to expose the add semantics to the imported
2256/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2257/// because the selector works bottom up, uses before defs. By the time we
2258/// end up trying to select a G_PTR_ADD, we should have already attempted to
2259/// fold this into addressing modes and were therefore unsuccessful.
2260bool AArch64InstructionSelector::convertPtrAddToAdd(
2261 MachineInstr &I, MachineRegisterInfo &MRI) {
2262 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2263 Register DstReg = I.getOperand(0).getReg();
2264 Register AddOp1Reg = I.getOperand(1).getReg();
2265 const LLT PtrTy = MRI.getType(DstReg);
2266 if (PtrTy.getAddressSpace() != 0)
2267 return false;
2268
2269 const LLT CastPtrTy = PtrTy.isVector()
2271 : LLT::integer(64);
2272 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2273 // Set regbanks on the registers.
2274 if (PtrTy.isVector())
2275 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2276 else
2277 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2278
2279 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2280 // %dst(intty) = G_ADD %intbase, off
2281 I.setDesc(TII.get(TargetOpcode::G_ADD));
2282 MRI.setType(DstReg, CastPtrTy);
2283 I.getOperand(1).setReg(PtrToInt.getReg(0));
2284 if (!select(*PtrToInt)) {
2285 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2286 return false;
2287 }
2288
2289 // Also take the opportunity here to try to do some optimization.
2290 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2291 Register NegatedReg;
2292 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2293 return true;
2294 I.getOperand(2).setReg(NegatedReg);
2295 I.setDesc(TII.get(TargetOpcode::G_SUB));
2296 return true;
2297}
2298
2299bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2300 MachineRegisterInfo &MRI) {
2301 // We try to match the immediate variant of LSL, which is actually an alias
2302 // for a special case of UBFM. Otherwise, we fall back to the imported
2303 // selector which will match the register variant.
2304 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2305 const auto &MO = I.getOperand(2);
2306 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2307 if (!VRegAndVal)
2308 return false;
2309
2310 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2311 if (DstTy.isVector())
2312 return false;
2313 bool Is64Bit = DstTy.getSizeInBits() == 64;
2314 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2315 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2316
2317 if (!Imm1Fn || !Imm2Fn)
2318 return false;
2319
2320 auto NewI =
2321 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2322 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2323
2324 for (auto &RenderFn : *Imm1Fn)
2325 RenderFn(NewI);
2326 for (auto &RenderFn : *Imm2Fn)
2327 RenderFn(NewI);
2328
2329 I.eraseFromParent();
2331 return true;
2332}
2333
2334bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2335 MachineInstr &I, MachineRegisterInfo &MRI) {
2336 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2337 // If we're storing a scalar, it doesn't matter what register bank that
2338 // scalar is on. All that matters is the size.
2339 //
2340 // So, if we see something like this (with a 32-bit scalar as an example):
2341 //
2342 // %x:gpr(s32) = ... something ...
2343 // %y:fpr(s32) = COPY %x:gpr(s32)
2344 // G_STORE %y:fpr(s32)
2345 //
2346 // We can fix this up into something like this:
2347 //
2348 // G_STORE %x:gpr(s32)
2349 //
2350 // And then continue the selection process normally.
2351 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2352 if (!DefDstReg.isValid())
2353 return false;
2354 LLT DefDstTy = MRI.getType(DefDstReg);
2355 Register StoreSrcReg = I.getOperand(0).getReg();
2356 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2357
2358 // If we get something strange like a physical register, then we shouldn't
2359 // go any further.
2360 if (!DefDstTy.isValid())
2361 return false;
2362
2363 // Are the source and dst types the same size?
2364 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2365 return false;
2366
2367 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2368 RBI.getRegBank(DefDstReg, MRI, TRI))
2369 return false;
2370
2371 // We have a cross-bank copy, which is entering a store. Let's fold it.
2372 I.getOperand(0).setReg(DefDstReg);
2373 return true;
2374}
2375
2376bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2377 assert(I.getParent() && "Instruction should be in a basic block!");
2378 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2379
2380 MachineBasicBlock &MBB = *I.getParent();
2381 MachineFunction &MF = *MBB.getParent();
2382 MachineRegisterInfo &MRI = MF.getRegInfo();
2383
2384 switch (I.getOpcode()) {
2385 case AArch64::G_DUP: {
2386 // Before selecting a DUP instruction, check if it is better selected as a
2387 // MOV or load from a constant pool.
2388 Register Src = I.getOperand(1).getReg();
2389 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
2390 Src, MRI, /*LookThroughInstrs=*/true, /*LookThroughAnyExt=*/true);
2391 if (!ValAndVReg)
2392 return false;
2393 LLVMContext &Ctx = MF.getFunction().getContext();
2394 Register Dst = I.getOperand(0).getReg();
2396 MRI.getType(Dst).getNumElements(),
2397 ConstantInt::get(
2398 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2399 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2400 if (!emitConstantVector(Dst, CV, MIB, MRI))
2401 return false;
2402 I.eraseFromParent();
2403 return true;
2404 }
2405 case TargetOpcode::G_SEXT:
2406 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2407 // over a normal extend.
2408 if (selectUSMovFromExtend(I, MRI))
2409 return true;
2410 return false;
2411 case TargetOpcode::G_BR:
2412 return false;
2413 case TargetOpcode::G_SHL:
2414 return earlySelectSHL(I, MRI);
2415 case TargetOpcode::G_CONSTANT: {
2416 bool IsZero = false;
2417 if (I.getOperand(1).isCImm())
2418 IsZero = I.getOperand(1).getCImm()->isZero();
2419 else if (I.getOperand(1).isImm())
2420 IsZero = I.getOperand(1).getImm() == 0;
2421
2422 if (!IsZero)
2423 return false;
2424
2425 Register DefReg = I.getOperand(0).getReg();
2426 LLT Ty = MRI.getType(DefReg);
2427 if (Ty.getSizeInBits() == 64) {
2428 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2429 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2430 } else if (Ty.getSizeInBits() <= 32) {
2431 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2432 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2433 } else
2434 return false;
2435
2436 I.setDesc(TII.get(TargetOpcode::COPY));
2437 return true;
2438 }
2439
2440 case TargetOpcode::G_ADD: {
2441 // Check if this is being fed by a G_ICMP on either side.
2442 //
2443 // (cmp pred, x, y) + z
2444 //
2445 // In the above case, when the cmp is true, we increment z by 1. So, we can
2446 // fold the add into the cset for the cmp by using cinc.
2447 //
2448 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2449 Register AddDst = I.getOperand(0).getReg();
2450 Register AddLHS = I.getOperand(1).getReg();
2451 Register AddRHS = I.getOperand(2).getReg();
2452 // Only handle scalars.
2453 LLT Ty = MRI.getType(AddLHS);
2454 if (Ty.isVector())
2455 return false;
2456 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2457 // bits.
2458 unsigned Size = Ty.getSizeInBits();
2459 if (Size != 32 && Size != 64)
2460 return false;
2461 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2462 if (!MRI.hasOneNonDBGUse(Reg))
2463 return nullptr;
2464 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2465 // compare.
2466 if (Size == 32)
2467 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2468 // We model scalar compares using 32-bit destinations right now.
2469 // If it's a 64-bit compare, it'll have 64-bit sources.
2470 Register ZExt;
2471 if (!mi_match(Reg, MRI,
2473 return nullptr;
2474 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2475 if (!Cmp ||
2476 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2477 return nullptr;
2478 return Cmp;
2479 };
2480 // Try to match
2481 // z + (cmp pred, x, y)
2482 MachineInstr *Cmp = MatchCmp(AddRHS);
2483 if (!Cmp) {
2484 // (cmp pred, x, y) + z
2485 std::swap(AddLHS, AddRHS);
2486 Cmp = MatchCmp(AddRHS);
2487 if (!Cmp)
2488 return false;
2489 }
2490 auto &PredOp = Cmp->getOperand(1);
2492 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2493 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2494 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2496 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2497 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2498 I.eraseFromParent();
2499 return true;
2500 }
2501 case TargetOpcode::G_OR: {
2502 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2503 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2504 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2505 Register Dst = I.getOperand(0).getReg();
2506 LLT Ty = MRI.getType(Dst);
2507
2508 if (!Ty.isScalar())
2509 return false;
2510
2511 unsigned Size = Ty.getSizeInBits();
2512 if (Size != 32 && Size != 64)
2513 return false;
2514
2515 Register ShiftSrc;
2516 int64_t ShiftImm;
2517 Register MaskSrc;
2518 int64_t MaskImm;
2519 if (!mi_match(
2520 Dst, MRI,
2521 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2522 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2523 return false;
2524
2525 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2526 return false;
2527
2528 int64_t Immr = Size - ShiftImm;
2529 int64_t Imms = Size - ShiftImm - 1;
2530 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2531 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2532 I.eraseFromParent();
2533 return true;
2534 }
2535 case TargetOpcode::G_FENCE: {
2536 if (I.getOperand(1).getImm() == 0)
2537 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2538 else
2539 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2540 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2541 I.eraseFromParent();
2542 return true;
2543 }
2544 default:
2545 return false;
2546 }
2547}
2548
2549bool AArch64InstructionSelector::select(MachineInstr &I) {
2550 assert(I.getParent() && "Instruction should be in a basic block!");
2551 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2552
2553 MachineBasicBlock &MBB = *I.getParent();
2554 MachineFunction &MF = *MBB.getParent();
2555 MachineRegisterInfo &MRI = MF.getRegInfo();
2556
2557 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2558 if (Subtarget->requiresStrictAlign()) {
2559 // We don't support this feature yet.
2560 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2561 return false;
2562 }
2563
2565
2566 unsigned Opcode = I.getOpcode();
2567 // G_PHI requires same handling as PHI
2568 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2569 // Certain non-generic instructions also need some special handling.
2570
2571 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) {
2573 return true;
2574 }
2575
2576 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2577 const Register DefReg = I.getOperand(0).getReg();
2578 const LLT DefTy = MRI.getType(DefReg);
2579
2580 const RegClassOrRegBank &RegClassOrBank =
2581 MRI.getRegClassOrRegBank(DefReg);
2582
2583 const TargetRegisterClass *DefRC =
2585 if (!DefRC) {
2586 if (!DefTy.isValid()) {
2587 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2588 return false;
2589 }
2590 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2591 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2592 if (!DefRC) {
2593 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2594 return false;
2595 }
2596 }
2597
2598 I.setDesc(TII.get(TargetOpcode::PHI));
2599
2600 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2601 }
2602
2603 if (I.isCopy())
2604 return selectCopy(I, TII, MRI, TRI, RBI);
2605
2606 if (I.isDebugInstr())
2607 return selectDebugInstr(I, MRI, RBI);
2608
2609 return true;
2610 }
2611
2612
2613 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2614 LLVM_DEBUG(
2615 dbgs() << "Generic instruction has unexpected implicit operands\n");
2616 return false;
2617 }
2618
2619 // Try to do some lowering before we start instruction selecting. These
2620 // lowerings are purely transformations on the input G_MIR and so selection
2621 // must continue after any modification of the instruction.
2622 if (preISelLower(I)) {
2623 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2624 }
2625
2626 // There may be patterns where the importer can't deal with them optimally,
2627 // but does select it to a suboptimal sequence so our custom C++ selection
2628 // code later never has a chance to work on it. Therefore, we have an early
2629 // selection attempt here to give priority to certain selection routines
2630 // over the imported ones.
2631 if (earlySelect(I))
2632 return true;
2633
2634 if (selectImpl(I, *CoverageInfo))
2635 return true;
2636
2637 LLT Ty =
2638 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2639
2640 switch (Opcode) {
2641 case TargetOpcode::G_SBFX:
2642 case TargetOpcode::G_UBFX: {
2643 static const unsigned OpcTable[2][2] = {
2644 {AArch64::UBFMWri, AArch64::UBFMXri},
2645 {AArch64::SBFMWri, AArch64::SBFMXri}};
2646 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2647 unsigned Size = Ty.getSizeInBits();
2648 unsigned Opc = OpcTable[IsSigned][Size == 64];
2649 auto Cst1 =
2650 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2651 assert(Cst1 && "Should have gotten a constant for src 1?");
2652 auto Cst2 =
2653 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2654 assert(Cst2 && "Should have gotten a constant for src 2?");
2655 auto LSB = Cst1->Value.getZExtValue();
2656 auto Width = Cst2->Value.getZExtValue();
2657 auto BitfieldInst =
2658 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2659 .addImm(LSB)
2660 .addImm(LSB + Width - 1);
2661 I.eraseFromParent();
2662 constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2663 return true;
2664 }
2665 case TargetOpcode::G_BRCOND:
2666 return selectCompareBranch(I, MF, MRI);
2667
2668 case TargetOpcode::G_BRINDIRECT: {
2669 const Function &Fn = MF.getFunction();
2670 if (std::optional<uint16_t> BADisc =
2672 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2673 MI.addImm(AArch64PACKey::IA);
2674 MI.addImm(*BADisc);
2675 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2676 I.eraseFromParent();
2678 return true;
2679 }
2680 I.setDesc(TII.get(AArch64::BR));
2682 return true;
2683 }
2684
2685 case TargetOpcode::G_BRJT:
2686 return selectBrJT(I, MRI);
2687
2688 case AArch64::G_ADD_LOW: {
2689 // This op may have been separated from it's ADRP companion by the localizer
2690 // or some other code motion pass. Given that many CPUs will try to
2691 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2692 // which will later be expanded into an ADRP+ADD pair after scheduling.
2693 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2694 if (BaseMI->getOpcode() != AArch64::ADRP) {
2695 I.setDesc(TII.get(AArch64::ADDXri));
2696 I.addOperand(MachineOperand::CreateImm(0));
2698 return true;
2699 }
2701 "Expected small code model");
2702 auto Op1 = BaseMI->getOperand(1);
2703 auto Op2 = I.getOperand(2);
2704 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2705 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2706 Op1.getTargetFlags())
2707 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2708 Op2.getTargetFlags());
2709 I.eraseFromParent();
2710 constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2711 return true;
2712 }
2713
2714 case TargetOpcode::G_FCONSTANT: {
2715 const Register DefReg = I.getOperand(0).getReg();
2716 const LLT DefTy = MRI.getType(DefReg);
2717 const unsigned DefSize = DefTy.getSizeInBits();
2718 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2719
2720 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2721 // For 16, 64, and 128b values, emit a constant pool load.
2722 switch (DefSize) {
2723 default:
2724 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2725 case 32:
2726 case 64: {
2727 bool OptForSize = shouldOptForSize(&MF);
2728 const auto &TLI = MF.getSubtarget().getTargetLowering();
2729 // If TLI says that this fpimm is illegal, then we'll expand to a
2730 // constant pool load.
2731 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2732 EVT::getFloatingPointVT(DefSize), OptForSize))
2733 break;
2734 [[fallthrough]];
2735 }
2736 case 16:
2737 case 128: {
2738 auto *FPImm = I.getOperand(1).getFPImm();
2739 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2740 if (!LoadMI) {
2741 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2742 return false;
2743 }
2744 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2745 I.eraseFromParent();
2746 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2747 }
2748 }
2749
2750 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2751 // Either emit a FMOV, or emit a copy to emit a normal mov.
2752 const Register DefGPRReg = MRI.createVirtualRegister(
2753 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2754 MachineOperand &RegOp = I.getOperand(0);
2755 RegOp.setReg(DefGPRReg);
2756 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2757 MIB.buildCopy({DefReg}, {DefGPRReg});
2758
2759 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2760 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2761 return false;
2762 }
2763
2764 MachineOperand &ImmOp = I.getOperand(1);
2765 ImmOp.ChangeToImmediate(
2767
2768 const unsigned MovOpc =
2769 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2770 I.setDesc(TII.get(MovOpc));
2772 return true;
2773 }
2774 case TargetOpcode::G_EXTRACT: {
2775 Register DstReg = I.getOperand(0).getReg();
2776 Register SrcReg = I.getOperand(1).getReg();
2777 LLT SrcTy = MRI.getType(SrcReg);
2778 LLT DstTy = MRI.getType(DstReg);
2779 (void)DstTy;
2780 unsigned SrcSize = SrcTy.getSizeInBits();
2781
2782 if (SrcTy.getSizeInBits() > 64) {
2783 // This should be an extract of an s128, which is like a vector extract.
2784 if (SrcTy.getSizeInBits() != 128)
2785 return false;
2786 // Only support extracting 64 bits from an s128 at the moment.
2787 if (DstTy.getSizeInBits() != 64)
2788 return false;
2789
2790 unsigned Offset = I.getOperand(2).getImm();
2791 if (Offset % 64 != 0)
2792 return false;
2793
2794 // Check we have the right regbank always.
2795 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2796 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2797 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2798
2799 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2800 auto NewI =
2801 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2802 .addUse(SrcReg, {},
2803 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2804 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2805 AArch64::GPR64RegClass, NewI->getOperand(0));
2806 I.eraseFromParent();
2807 return true;
2808 }
2809
2810 // Emit the same code as a vector extract.
2811 // Offset must be a multiple of 64.
2812 unsigned LaneIdx = Offset / 64;
2813 MachineInstr *Extract = emitExtractVectorElt(
2814 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2815 if (!Extract)
2816 return false;
2817 I.eraseFromParent();
2818 return true;
2819 }
2820
2821 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2822 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2823 Ty.getSizeInBits() - 1);
2824
2825 if (SrcSize < 64) {
2826 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2827 "unexpected G_EXTRACT types");
2829 return true;
2830 }
2831
2832 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2833 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2834 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2835 .addReg(DstReg, {}, AArch64::sub_32);
2836 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2837 AArch64::GPR32RegClass, MRI);
2838 I.getOperand(0).setReg(DstReg);
2839
2841 return true;
2842 }
2843
2844 case TargetOpcode::G_INSERT: {
2845 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2846 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2847 unsigned DstSize = DstTy.getSizeInBits();
2848 // Larger inserts are vectors, same-size ones should be something else by
2849 // now (split up or turned into COPYs).
2850 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2851 return false;
2852
2853 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2854 unsigned LSB = I.getOperand(3).getImm();
2855 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2856 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2857 MachineInstrBuilder(MF, I).addImm(Width - 1);
2858
2859 if (DstSize < 64) {
2860 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2861 "unexpected G_INSERT types");
2863 return true;
2864 }
2865
2867 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2868 TII.get(AArch64::SUBREG_TO_REG))
2869 .addDef(SrcReg)
2870 .addUse(I.getOperand(2).getReg())
2871 .addImm(AArch64::sub_32);
2872 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2873 AArch64::GPR32RegClass, MRI);
2874 I.getOperand(2).setReg(SrcReg);
2875
2877 return true;
2878 }
2879 case TargetOpcode::G_FRAME_INDEX: {
2880 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2881 if (Ty != LLT::pointer(0, 64)) {
2882 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2883 << ", expected: " << LLT::pointer(0, 64) << '\n');
2884 return false;
2885 }
2886 I.setDesc(TII.get(AArch64::ADDXri));
2887
2888 // MOs for a #0 shifted immediate.
2889 I.addOperand(MachineOperand::CreateImm(0));
2890 I.addOperand(MachineOperand::CreateImm(0));
2891
2893 return true;
2894 }
2895
2896 case TargetOpcode::G_GLOBAL_VALUE: {
2897 const GlobalValue *GV = nullptr;
2898 unsigned OpFlags;
2899 if (I.getOperand(1).isSymbol()) {
2900 OpFlags = I.getOperand(1).getTargetFlags();
2901 // Currently only used by "RtLibUseGOT".
2902 assert(OpFlags == AArch64II::MO_GOT);
2903 } else {
2904 GV = I.getOperand(1).getGlobal();
2905 if (GV->isThreadLocal()) {
2906 // We don't support instructions with emulated TLS variables yet
2907 if (TM.useEmulatedTLS())
2908 return false;
2909 return selectTLSGlobalValue(I, MRI);
2910 }
2911 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2912 }
2913
2914 if (OpFlags & AArch64II::MO_GOT) {
2915 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2916 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2917 I.getOperand(1).setTargetFlags(OpFlags);
2918 I.addImplicitDefUseOperands(MF);
2919 } else if (TM.getCodeModel() == CodeModel::Large &&
2920 !TM.isPositionIndependent()) {
2921 // Materialize the global using movz/movk instructions.
2922 materializeLargeCMVal(I, GV, OpFlags);
2923 I.eraseFromParent();
2924 return true;
2925 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2926 I.setDesc(TII.get(AArch64::ADR));
2927 I.getOperand(1).setTargetFlags(OpFlags);
2928 } else {
2929 I.setDesc(TII.get(AArch64::MOVaddr));
2930 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2931 MachineInstrBuilder MIB(MF, I);
2932 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2934 }
2936 return true;
2937 }
2938
2939 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2940 return selectPtrAuthGlobalValue(I, MRI);
2941
2942 case TargetOpcode::G_ZEXTLOAD:
2943 case TargetOpcode::G_LOAD:
2944 case TargetOpcode::G_STORE: {
2945 GLoadStore &LdSt = cast<GLoadStore>(I);
2946 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2947 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2948
2949 // Can only handle AddressSpace 0, 64-bit pointers.
2950 if (PtrTy != LLT::pointer(0, 64)) {
2951 return false;
2952 }
2953
2954 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2955 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2956 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2957
2958 // Need special instructions for atomics that affect ordering.
2959 if (isStrongerThanMonotonic(Order)) {
2960 assert(!isa<GZExtLoad>(LdSt));
2961 assert(MemSizeInBytes <= 8 &&
2962 "128-bit atomics should already be custom-legalized");
2963
2964 if (isa<GLoad>(LdSt)) {
2965 static constexpr unsigned LDAPROpcodes[] = {
2966 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2967 static constexpr unsigned LDAROpcodes[] = {
2968 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2969 ArrayRef<unsigned> Opcodes =
2970 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2971 ? LDAPROpcodes
2972 : LDAROpcodes;
2973 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2974 } else {
2975 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2976 AArch64::STLRW, AArch64::STLRX};
2977 Register ValReg = LdSt.getReg(0);
2978 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2979 // Emit a subreg copy of 32 bits.
2980 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2981 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2982 .addReg(I.getOperand(0).getReg(), {}, AArch64::sub_32);
2983 I.getOperand(0).setReg(NewVal);
2984 }
2985 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2986 }
2988 return true;
2989 }
2990
2991#ifndef NDEBUG
2992 const Register PtrReg = LdSt.getPointerReg();
2993 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2994 // Check that the pointer register is valid.
2995 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2996 "Load/Store pointer operand isn't a GPR");
2997 assert(MRI.getType(PtrReg).isPointer() &&
2998 "Load/Store pointer operand isn't a pointer");
2999#endif
3000
3001 const Register ValReg = LdSt.getReg(0);
3002 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3003 LLT ValTy = MRI.getType(ValReg);
3004
3005 // The code below doesn't support truncating stores, so we need to split it
3006 // again.
3007 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3008 unsigned SubReg;
3009 LLT MemTy = LdSt.getMMO().getMemoryType();
3010 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3011 if (!getSubRegForClass(RC, TRI, SubReg))
3012 return false;
3013
3014 // Generate a subreg copy.
3015 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3016 .addReg(ValReg, {}, SubReg)
3017 .getReg(0);
3018 RBI.constrainGenericRegister(Copy, *RC, MRI);
3019 LdSt.getOperand(0).setReg(Copy);
3020 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3021 // If this is an any-extending load from the FPR bank, split it into a regular
3022 // load + extend.
3023 if (RB.getID() == AArch64::FPRRegBankID) {
3024 unsigned SubReg;
3025 LLT MemTy = LdSt.getMMO().getMemoryType();
3026 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3027 if (!getSubRegForClass(RC, TRI, SubReg))
3028 return false;
3029 Register OldDst = LdSt.getReg(0);
3030 Register NewDst =
3032 LdSt.getOperand(0).setReg(NewDst);
3033 MRI.setRegBank(NewDst, RB);
3034 // Generate a SUBREG_TO_REG to extend it.
3035 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3036 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3037 .addUse(NewDst)
3038 .addImm(SubReg);
3039 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3040 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3041 MIB.setInstr(LdSt);
3042 ValTy = MemTy; // This is no longer an extending load.
3043 }
3044 }
3045
3046 // Helper lambda for partially selecting I. Either returns the original
3047 // instruction with an updated opcode, or a new instruction.
3048 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3049 bool IsStore = isa<GStore>(I);
3050 const unsigned NewOpc =
3051 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3052 if (NewOpc == I.getOpcode())
3053 return nullptr;
3054 // Check if we can fold anything into the addressing mode.
3055 auto AddrModeFns =
3056 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3057 if (!AddrModeFns) {
3058 // Can't fold anything. Use the original instruction.
3059 I.setDesc(TII.get(NewOpc));
3060 I.addOperand(MachineOperand::CreateImm(0));
3061 return &I;
3062 }
3063
3064 // Folded something. Create a new instruction and return it.
3065 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3066 Register CurValReg = I.getOperand(0).getReg();
3067 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3068 NewInst.cloneMemRefs(I);
3069 for (auto &Fn : *AddrModeFns)
3070 Fn(NewInst);
3071 I.eraseFromParent();
3072 return &*NewInst;
3073 };
3074
3075 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3076 if (!LoadStore)
3077 return false;
3078
3079 // If we're storing a 0, use WZR/XZR.
3080 if (Opcode == TargetOpcode::G_STORE) {
3082 LoadStore->getOperand(0).getReg(), MRI);
3083 if (CVal && CVal->Value == 0) {
3084 switch (LoadStore->getOpcode()) {
3085 case AArch64::STRWui:
3086 case AArch64::STRHHui:
3087 case AArch64::STRBBui:
3088 LoadStore->getOperand(0).setReg(AArch64::WZR);
3089 break;
3090 case AArch64::STRXui:
3091 LoadStore->getOperand(0).setReg(AArch64::XZR);
3092 break;
3093 }
3094 }
3095 }
3096
3097 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3098 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3099 // The any/zextload from a smaller type to i32 should be handled by the
3100 // importer.
3101 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3102 return false;
3103 // If we have an extending load then change the load's type to be a
3104 // narrower reg and zero_extend with SUBREG_TO_REG.
3105 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3106 Register DstReg = LoadStore->getOperand(0).getReg();
3107 LoadStore->getOperand(0).setReg(LdReg);
3108
3109 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3110 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3111 .addUse(LdReg)
3112 .addImm(AArch64::sub_32);
3113 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3114 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3115 MRI);
3116 }
3117 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3118 return true;
3119 }
3120
3121 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3122 case TargetOpcode::G_INDEXED_SEXTLOAD:
3123 return selectIndexedExtLoad(I, MRI);
3124 case TargetOpcode::G_INDEXED_LOAD:
3125 return selectIndexedLoad(I, MRI);
3126 case TargetOpcode::G_INDEXED_STORE:
3127 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3128
3129 case TargetOpcode::G_LSHR:
3130 case TargetOpcode::G_ASHR:
3131 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3132 return selectVectorAshrLshr(I, MRI);
3133 [[fallthrough]];
3134 case TargetOpcode::G_SHL:
3135 if (Opcode == TargetOpcode::G_SHL &&
3136 MRI.getType(I.getOperand(0).getReg()).isVector())
3137 return selectVectorSHL(I, MRI);
3138
3139 // These shifts were legalized to have 64 bit shift amounts because we
3140 // want to take advantage of the selection patterns that assume the
3141 // immediates are s64s, however, selectBinaryOp will assume both operands
3142 // will have the same bit size.
3143 {
3144 Register SrcReg = I.getOperand(1).getReg();
3145 Register ShiftReg = I.getOperand(2).getReg();
3146 const LLT ShiftTy = MRI.getType(ShiftReg);
3147 const LLT SrcTy = MRI.getType(SrcReg);
3148 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3149 ShiftTy.getSizeInBits() == 64) {
3150 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3151 // Insert a subregister copy to implement a 64->32 trunc
3152 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3153 .addReg(ShiftReg, {}, AArch64::sub_32);
3154 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3155 I.getOperand(2).setReg(Trunc.getReg(0));
3156 }
3157 }
3158 [[fallthrough]];
3159 case TargetOpcode::G_OR: {
3160 // Reject the various things we don't support yet.
3161 if (unsupportedBinOp(I, RBI, MRI, TRI))
3162 return false;
3163
3164 const unsigned OpSize = Ty.getSizeInBits();
3165
3166 const Register DefReg = I.getOperand(0).getReg();
3167 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3168
3169 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3170 if (NewOpc == I.getOpcode())
3171 return false;
3172
3173 I.setDesc(TII.get(NewOpc));
3174 // FIXME: Should the type be always reset in setDesc?
3175
3176 // Now that we selected an opcode, we need to constrain the register
3177 // operands to use appropriate classes.
3179 return true;
3180 }
3181
3182 case TargetOpcode::G_PTR_ADD: {
3183 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3184 I.eraseFromParent();
3185 return true;
3186 }
3187
3188 case TargetOpcode::G_SADDE:
3189 case TargetOpcode::G_UADDE:
3190 case TargetOpcode::G_SSUBE:
3191 case TargetOpcode::G_USUBE:
3192 case TargetOpcode::G_SADDO:
3193 case TargetOpcode::G_UADDO:
3194 case TargetOpcode::G_SSUBO:
3195 case TargetOpcode::G_USUBO:
3196 return selectOverflowOp(I, MRI);
3197
3198 case TargetOpcode::G_PTRMASK: {
3199 Register MaskReg = I.getOperand(2).getReg();
3200 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3201 // TODO: Implement arbitrary cases
3202 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3203 return false;
3204
3205 uint64_t Mask = *MaskVal;
3206 I.setDesc(TII.get(AArch64::ANDXri));
3207 I.getOperand(2).ChangeToImmediate(
3209
3211 return true;
3212 }
3213 case TargetOpcode::G_PTRTOINT:
3214 case TargetOpcode::G_TRUNC: {
3215 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3216 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3217
3218 const Register DstReg = I.getOperand(0).getReg();
3219 const Register SrcReg = I.getOperand(1).getReg();
3220
3221 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3222 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3223
3224 if (DstRB.getID() != SrcRB.getID()) {
3225 LLVM_DEBUG(
3226 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3227 return false;
3228 }
3229
3230 if (DstRB.getID() == AArch64::GPRRegBankID) {
3231 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3232 if (!DstRC)
3233 return false;
3234
3235 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3236 if (!SrcRC)
3237 return false;
3238
3239 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3240 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3241 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3242 return false;
3243 }
3244
3245 if (DstRC == SrcRC) {
3246 // Nothing to be done
3247 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3248 SrcTy == LLT::scalar(64)) {
3249 llvm_unreachable("TableGen can import this case");
3250 return false;
3251 } else if (DstRC == &AArch64::GPR32RegClass &&
3252 SrcRC == &AArch64::GPR64RegClass) {
3253 I.getOperand(1).setSubReg(AArch64::sub_32);
3254 } else {
3255 LLVM_DEBUG(
3256 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3257 return false;
3258 }
3259
3260 I.setDesc(TII.get(TargetOpcode::COPY));
3261 return true;
3262 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3263 if (DstTy == LLT::fixed_vector(4, 16) &&
3264 SrcTy == LLT::fixed_vector(4, 32)) {
3265 I.setDesc(TII.get(AArch64::XTNv4i16));
3267 return true;
3268 }
3269
3270 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3271 MachineInstr *Extract = emitExtractVectorElt(
3272 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3273 if (!Extract)
3274 return false;
3275 I.eraseFromParent();
3276 return true;
3277 }
3278
3279 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3280 if (Opcode == TargetOpcode::G_PTRTOINT) {
3281 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3282 I.setDesc(TII.get(TargetOpcode::COPY));
3283 return selectCopy(I, TII, MRI, TRI, RBI);
3284 }
3285 }
3286
3287 return false;
3288 }
3289
3290 case TargetOpcode::G_ANYEXT: {
3291 if (selectUSMovFromExtend(I, MRI))
3292 return true;
3293
3294 const Register DstReg = I.getOperand(0).getReg();
3295 const Register SrcReg = I.getOperand(1).getReg();
3296
3297 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3298 if (RBDst.getID() != AArch64::GPRRegBankID) {
3299 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3300 << ", expected: GPR\n");
3301 return false;
3302 }
3303
3304 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3305 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3306 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3307 << ", expected: GPR\n");
3308 return false;
3309 }
3310
3311 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3312
3313 if (DstSize == 0) {
3314 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3315 return false;
3316 }
3317
3318 if (DstSize != 64 && DstSize > 32) {
3319 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3320 << ", expected: 32 or 64\n");
3321 return false;
3322 }
3323 // At this point G_ANYEXT is just like a plain COPY, but we need
3324 // to explicitly form the 64-bit value if any.
3325 if (DstSize > 32) {
3326 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3327 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3328 .addDef(ExtSrc)
3329 .addUse(SrcReg)
3330 .addImm(AArch64::sub_32);
3331 I.getOperand(1).setReg(ExtSrc);
3332 }
3333 return selectCopy(I, TII, MRI, TRI, RBI);
3334 }
3335
3336 case TargetOpcode::G_ZEXT:
3337 case TargetOpcode::G_SEXT_INREG:
3338 case TargetOpcode::G_SEXT: {
3339 if (selectUSMovFromExtend(I, MRI))
3340 return true;
3341
3342 unsigned Opcode = I.getOpcode();
3343 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3344 const Register DefReg = I.getOperand(0).getReg();
3345 Register SrcReg = I.getOperand(1).getReg();
3346 const LLT DstTy = MRI.getType(DefReg);
3347 const LLT SrcTy = MRI.getType(SrcReg);
3348 unsigned DstSize = DstTy.getSizeInBits();
3349 unsigned SrcSize = SrcTy.getSizeInBits();
3350
3351 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3352 // extended is encoded in the imm.
3353 if (Opcode == TargetOpcode::G_SEXT_INREG)
3354 SrcSize = I.getOperand(2).getImm();
3355
3356 if (DstTy.isVector())
3357 return false; // Should be handled by imported patterns.
3358
3359 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3360 AArch64::GPRRegBankID &&
3361 "Unexpected ext regbank");
3362
3363 MachineInstr *ExtI;
3364
3365 // First check if we're extending the result of a load which has a dest type
3366 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3367 // GPR register on AArch64 and all loads which are smaller automatically
3368 // zero-extend the upper bits. E.g.
3369 // %v(s8) = G_LOAD %p, :: (load 1)
3370 // %v2(s32) = G_ZEXT %v(s8)
3371 if (!IsSigned) {
3372 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3373 bool IsGPR =
3374 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3375 if (LoadMI && IsGPR) {
3376 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3377 unsigned BytesLoaded = MemOp->getSize().getValue();
3378 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3379 return selectCopy(I, TII, MRI, TRI, RBI);
3380 }
3381
3382 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3383 // + SUBREG_TO_REG.
3384 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3385 Register SubregToRegSrc =
3386 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3387 const Register ZReg = AArch64::WZR;
3388 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3389 .addImm(0);
3390
3391 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3392 .addUse(SubregToRegSrc)
3393 .addImm(AArch64::sub_32);
3394
3395 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3396 MRI)) {
3397 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3398 return false;
3399 }
3400
3401 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3402 MRI)) {
3403 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3404 return false;
3405 }
3406
3407 I.eraseFromParent();
3408 return true;
3409 }
3410 }
3411
3412 if (DstSize == 64) {
3413 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3414 // FIXME: Can we avoid manually doing this?
3415 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3416 MRI)) {
3417 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3418 << " operand\n");
3419 return false;
3420 }
3421 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3422 {&AArch64::GPR64RegClass}, {})
3423 .addUse(SrcReg)
3424 .addImm(AArch64::sub_32)
3425 .getReg(0);
3426 }
3427
3428 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3429 {DefReg}, {SrcReg})
3430 .addImm(0)
3431 .addImm(SrcSize - 1);
3432 } else if (DstSize <= 32) {
3433 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3434 {DefReg}, {SrcReg})
3435 .addImm(0)
3436 .addImm(SrcSize - 1);
3437 } else {
3438 return false;
3439 }
3440
3442 I.eraseFromParent();
3443 return true;
3444 }
3445
3446 case TargetOpcode::G_FREEZE:
3447 return selectCopy(I, TII, MRI, TRI, RBI);
3448
3449 case TargetOpcode::G_INTTOPTR:
3450 // The importer is currently unable to import pointer types since they
3451 // didn't exist in SelectionDAG.
3452 return selectCopy(I, TII, MRI, TRI, RBI);
3453
3454 case TargetOpcode::G_BITCAST:
3455 // Imported SelectionDAG rules can handle every bitcast except those that
3456 // bitcast from a type to the same type. Ideally, these shouldn't occur
3457 // but we might not run an optimizer that deletes them. The other exception
3458 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3459 // of them.
3460 return selectCopy(I, TII, MRI, TRI, RBI);
3461
3462 case TargetOpcode::G_SELECT: {
3463 auto &Sel = cast<GSelect>(I);
3464 const Register CondReg = Sel.getCondReg();
3465 const Register TReg = Sel.getTrueReg();
3466 const Register FReg = Sel.getFalseReg();
3467
3468 if (tryOptSelect(Sel))
3469 return true;
3470
3471 // Make sure to use an unused vreg instead of wzr, so that the peephole
3472 // optimizations will be able to optimize these.
3473 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3474 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3475 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3477 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3478 return false;
3479 Sel.eraseFromParent();
3480 return true;
3481 }
3482 case TargetOpcode::G_ICMP: {
3483 if (Ty.isVector())
3484 return false;
3485
3486 if (Ty != LLT::scalar(32)) {
3487 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3488 << ", expected: " << LLT::scalar(32) << '\n');
3489 return false;
3490 }
3491
3492 auto &PredOp = I.getOperand(1);
3493 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3494 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3496 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3497 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3498 /*Src2=*/AArch64::WZR, InvCC, MIB);
3499 I.eraseFromParent();
3500 return true;
3501 }
3502
3503 case TargetOpcode::G_FCMP: {
3504 CmpInst::Predicate Pred =
3505 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3506 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3507 Pred) ||
3508 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3509 return false;
3510 I.eraseFromParent();
3511 return true;
3512 }
3513 case TargetOpcode::G_VASTART:
3514 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3515 : selectVaStartAAPCS(I, MF, MRI);
3516 case TargetOpcode::G_INTRINSIC:
3517 return selectIntrinsic(I, MRI);
3518 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3519 return selectIntrinsicWithSideEffects(I, MRI);
3520 case TargetOpcode::G_IMPLICIT_DEF: {
3521 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3522 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3523 const Register DstReg = I.getOperand(0).getReg();
3524 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3525 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3526 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3527 return true;
3528 }
3529 case TargetOpcode::G_BLOCK_ADDR: {
3530 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3531 if (std::optional<uint16_t> BADisc =
3533 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3534 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3535 MIB.buildInstr(AArch64::MOVaddrPAC)
3536 .addBlockAddress(I.getOperand(1).getBlockAddress())
3538 .addReg(/*AddrDisc=*/AArch64::XZR)
3539 .addImm(*BADisc)
3540 .constrainAllUses(TII, TRI, RBI);
3541 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3542 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3543 AArch64::GPR64RegClass, MRI);
3544 I.eraseFromParent();
3545 return true;
3546 }
3548 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3549 I.eraseFromParent();
3550 return true;
3551 } else {
3552 I.setDesc(TII.get(AArch64::MOVaddrBA));
3553 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3554 I.getOperand(0).getReg())
3555 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3556 /* Offset */ 0, AArch64II::MO_PAGE)
3558 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3560 I.eraseFromParent();
3562 return true;
3563 }
3564 }
3565 case AArch64::G_DUP: {
3566 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3567 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3568 // difficult because at RBS we may end up pessimizing the fpr case if we
3569 // decided to add an anyextend to fix this. Manual selection is the most
3570 // robust solution for now.
3571 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3572 AArch64::GPRRegBankID)
3573 return false; // We expect the fpr regbank case to be imported.
3574 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3575 if (VecTy == LLT::fixed_vector(8, 8))
3576 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3577 else if (VecTy == LLT::fixed_vector(16, 8))
3578 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3579 else if (VecTy == LLT::fixed_vector(4, 16))
3580 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3581 else if (VecTy == LLT::fixed_vector(8, 16))
3582 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3583 else
3584 return false;
3586 return true;
3587 }
3588 case TargetOpcode::G_BUILD_VECTOR:
3589 return selectBuildVector(I, MRI);
3590 case TargetOpcode::G_MERGE_VALUES:
3591 return selectMergeValues(I, MRI);
3592 case TargetOpcode::G_UNMERGE_VALUES:
3593 return selectUnmergeValues(I, MRI);
3594 case TargetOpcode::G_SHUFFLE_VECTOR:
3595 return selectShuffleVector(I, MRI);
3596 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3597 return selectExtractElt(I, MRI);
3598 case TargetOpcode::G_CONCAT_VECTORS:
3599 return selectConcatVectors(I, MRI);
3600 case TargetOpcode::G_JUMP_TABLE:
3601 return selectJumpTable(I, MRI);
3602 case TargetOpcode::G_MEMCPY:
3603 case TargetOpcode::G_MEMCPY_INLINE:
3604 case TargetOpcode::G_MEMMOVE:
3605 case TargetOpcode::G_MEMSET:
3606 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3607 return selectMOPS(I, MRI);
3608 }
3609
3610 return false;
3611}
3612
3613bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3614 MachineIRBuilderState OldMIBState = MIB.getState();
3615 bool Success = select(I);
3616 MIB.setState(OldMIBState);
3617 return Success;
3618}
3619
3620bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3621 MachineRegisterInfo &MRI) {
3622 unsigned Mopcode;
3623 switch (GI.getOpcode()) {
3624 case TargetOpcode::G_MEMCPY:
3625 case TargetOpcode::G_MEMCPY_INLINE:
3626 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3627 break;
3628 case TargetOpcode::G_MEMMOVE:
3629 Mopcode = AArch64::MOPSMemoryMovePseudo;
3630 break;
3631 case TargetOpcode::G_MEMSET:
3632 // For tagged memset see llvm.aarch64.mops.memset.tag
3633 Mopcode = AArch64::MOPSMemorySetPseudo;
3634 break;
3635 }
3636
3637 auto &DstPtr = GI.getOperand(0);
3638 auto &SrcOrVal = GI.getOperand(1);
3639 auto &Size = GI.getOperand(2);
3640
3641 // Create copies of the registers that can be clobbered.
3642 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3643 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3644 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3645
3646 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3647 const auto &SrcValRegClass =
3648 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3649
3650 // Constrain to specific registers
3651 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3652 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3653 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3654
3655 MIB.buildCopy(DstPtrCopy, DstPtr);
3656 MIB.buildCopy(SrcValCopy, SrcOrVal);
3657 MIB.buildCopy(SizeCopy, Size);
3658
3659 // New instruction uses the copied registers because it must update them.
3660 // The defs are not used since they don't exist in G_MEM*. They are still
3661 // tied.
3662 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3663 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3664 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3665 if (IsSet) {
3666 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3667 {DstPtrCopy, SizeCopy, SrcValCopy});
3668 } else {
3669 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3670 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3671 {DstPtrCopy, SrcValCopy, SizeCopy});
3672 }
3673
3674 GI.eraseFromParent();
3675 return true;
3676}
3677
3678bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3679 MachineRegisterInfo &MRI) {
3680 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3681 Register JTAddr = I.getOperand(0).getReg();
3682 unsigned JTI = I.getOperand(1).getIndex();
3683 Register Index = I.getOperand(2).getReg();
3684
3685 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3686
3687 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3688 // sequence later, to guarantee the integrity of the intermediate values.
3689 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3691 if (STI.isTargetMachO()) {
3692 if (CM != CodeModel::Small && CM != CodeModel::Large)
3693 report_fatal_error("Unsupported code-model for hardened jump-table");
3694 } else {
3695 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3696 assert(STI.isTargetELF() &&
3697 "jump table hardening only supported on MachO/ELF");
3698 if (CM != CodeModel::Small)
3699 report_fatal_error("Unsupported code-model for hardened jump-table");
3700 }
3701
3702 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3703 MIB.buildInstr(AArch64::BR_JumpTable)
3704 .addJumpTableIndex(I.getOperand(1).getIndex());
3705 I.eraseFromParent();
3706 return true;
3707 }
3708
3709 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3710 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3711
3712 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3713 {TargetReg, ScratchReg}, {JTAddr, Index})
3714 .addJumpTableIndex(JTI);
3715 // Save the jump table info.
3716 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3717 {static_cast<int64_t>(JTI)});
3718 // Build the indirect branch.
3719 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3720 I.eraseFromParent();
3721 constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3722 return true;
3723}
3724
3725bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3726 MachineRegisterInfo &MRI) {
3727 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3728 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3729
3730 Register DstReg = I.getOperand(0).getReg();
3731 unsigned JTI = I.getOperand(1).getIndex();
3732 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3733 auto MovMI =
3734 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3735 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3737 I.eraseFromParent();
3739 return true;
3740}
3741
3742bool AArch64InstructionSelector::selectTLSGlobalValue(
3743 MachineInstr &I, MachineRegisterInfo &MRI) {
3744 if (!STI.isTargetMachO())
3745 return false;
3746 MachineFunction &MF = *I.getParent()->getParent();
3747 MF.getFrameInfo().setAdjustsStack(true);
3748
3749 const auto &GlobalOp = I.getOperand(1);
3750 assert(GlobalOp.getOffset() == 0 &&
3751 "Shouldn't have an offset on TLS globals!");
3752 const GlobalValue &GV = *GlobalOp.getGlobal();
3753
3754 auto LoadGOT =
3755 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3756 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3757
3758 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3759 {LoadGOT.getReg(0)})
3760 .addImm(0);
3761
3762 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3763 // TLS calls preserve all registers except those that absolutely must be
3764 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3765 // silly).
3766 unsigned Opcode = getBLRCallOpcode(MF);
3767
3768 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3769 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3770 assert(Opcode == AArch64::BLR);
3771 Opcode = AArch64::BLRAAZ;
3772 }
3773
3774 MIB.buildInstr(Opcode, {}, {Load})
3775 .addUse(AArch64::X0, RegState::Implicit)
3776 .addDef(AArch64::X0, RegState::Implicit)
3777 .addRegMask(TRI.getTLSCallPreservedMask());
3778
3779 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3780 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3781 MRI);
3782 I.eraseFromParent();
3783 return true;
3784}
3785
3786MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3787 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3788 MachineIRBuilder &MIRBuilder) const {
3789 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3790
3791 auto BuildFn = [&](unsigned SubregIndex) {
3792 auto Ins =
3793 MIRBuilder
3794 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3795 .addImm(SubregIndex);
3798 return &*Ins;
3799 };
3800
3801 switch (EltSize) {
3802 case 8:
3803 return BuildFn(AArch64::bsub);
3804 case 16:
3805 return BuildFn(AArch64::hsub);
3806 case 32:
3807 return BuildFn(AArch64::ssub);
3808 case 64:
3809 return BuildFn(AArch64::dsub);
3810 default:
3811 return nullptr;
3812 }
3813}
3814
3815MachineInstr *
3816AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3817 MachineIRBuilder &MIB,
3818 MachineRegisterInfo &MRI) const {
3819 LLT DstTy = MRI.getType(DstReg);
3820 const TargetRegisterClass *RC =
3821 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3822 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3823 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3824 return nullptr;
3825 }
3826 unsigned SubReg = 0;
3827 if (!getSubRegForClass(RC, TRI, SubReg))
3828 return nullptr;
3829 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3830 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3831 << DstTy.getSizeInBits() << "\n");
3832 return nullptr;
3833 }
3834 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3835 .addReg(SrcReg, {}, SubReg);
3836 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3837 return Copy;
3838}
3839
3840bool AArch64InstructionSelector::selectMergeValues(
3841 MachineInstr &I, MachineRegisterInfo &MRI) {
3842 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3843 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3844 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3845 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3846 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3847
3848 if (I.getNumOperands() != 3)
3849 return false;
3850
3851 // Merging 2 s64s into an s128.
3852 if (DstTy == LLT::scalar(128)) {
3853 if (SrcTy.getSizeInBits() != 64)
3854 return false;
3855 Register DstReg = I.getOperand(0).getReg();
3856 Register Src1Reg = I.getOperand(1).getReg();
3857 Register Src2Reg = I.getOperand(2).getReg();
3858 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3859 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3860 /* LaneIdx */ 0, RB, MIB);
3861 if (!InsMI)
3862 return false;
3863 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3864 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3865 if (!Ins2MI)
3866 return false;
3869 I.eraseFromParent();
3870 return true;
3871 }
3872
3873 if (RB.getID() != AArch64::GPRRegBankID)
3874 return false;
3875
3876 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3877 return false;
3878
3879 auto *DstRC = &AArch64::GPR64RegClass;
3880 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3881 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3882 TII.get(TargetOpcode::SUBREG_TO_REG))
3883 .addDef(SubToRegDef)
3884 .addUse(I.getOperand(1).getReg())
3885 .addImm(AArch64::sub_32);
3886 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3887 // Need to anyext the second scalar before we can use bfm
3888 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3889 TII.get(TargetOpcode::SUBREG_TO_REG))
3890 .addDef(SubToRegDef2)
3891 .addUse(I.getOperand(2).getReg())
3892 .addImm(AArch64::sub_32);
3893 MachineInstr &BFM =
3894 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3895 .addDef(I.getOperand(0).getReg())
3896 .addUse(SubToRegDef)
3897 .addUse(SubToRegDef2)
3898 .addImm(32)
3899 .addImm(31);
3900 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3901 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3903 I.eraseFromParent();
3904 return true;
3905}
3906
3907static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3908 const unsigned EltSize) {
3909 // Choose a lane copy opcode and subregister based off of the size of the
3910 // vector's elements.
3911 switch (EltSize) {
3912 case 8:
3913 CopyOpc = AArch64::DUPi8;
3914 ExtractSubReg = AArch64::bsub;
3915 break;
3916 case 16:
3917 CopyOpc = AArch64::DUPi16;
3918 ExtractSubReg = AArch64::hsub;
3919 break;
3920 case 32:
3921 CopyOpc = AArch64::DUPi32;
3922 ExtractSubReg = AArch64::ssub;
3923 break;
3924 case 64:
3925 CopyOpc = AArch64::DUPi64;
3926 ExtractSubReg = AArch64::dsub;
3927 break;
3928 default:
3929 // Unknown size, bail out.
3930 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3931 return false;
3932 }
3933 return true;
3934}
3935
3936MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3937 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3938 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3939 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3940 unsigned CopyOpc = 0;
3941 unsigned ExtractSubReg = 0;
3942 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3943 LLVM_DEBUG(
3944 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3945 return nullptr;
3946 }
3947
3948 const TargetRegisterClass *DstRC =
3949 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3950 if (!DstRC) {
3951 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3952 return nullptr;
3953 }
3954
3955 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3956 const LLT &VecTy = MRI.getType(VecReg);
3957 const TargetRegisterClass *VecRC =
3958 getRegClassForTypeOnBank(VecTy, VecRB, true);
3959 if (!VecRC) {
3960 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3961 return nullptr;
3962 }
3963
3964 // The register that we're going to copy into.
3965 Register InsertReg = VecReg;
3966 if (!DstReg)
3967 DstReg = MRI.createVirtualRegister(DstRC);
3968 // If the lane index is 0, we just use a subregister COPY.
3969 if (LaneIdx == 0) {
3970 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3971 .addReg(VecReg, {}, ExtractSubReg);
3972 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3973 return &*Copy;
3974 }
3975
3976 // Lane copies require 128-bit wide registers. If we're dealing with an
3977 // unpacked vector, then we need to move up to that width. Insert an implicit
3978 // def and a subregister insert to get us there.
3979 if (VecTy.getSizeInBits() != 128) {
3980 MachineInstr *ScalarToVector = emitScalarToVector(
3981 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3982 if (!ScalarToVector)
3983 return nullptr;
3984 InsertReg = ScalarToVector->getOperand(0).getReg();
3985 }
3986
3987 MachineInstr *LaneCopyMI =
3988 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3989 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3990
3991 // Make sure that we actually constrain the initial copy.
3992 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3993 return LaneCopyMI;
3994}
3995
3996bool AArch64InstructionSelector::selectExtractElt(
3997 MachineInstr &I, MachineRegisterInfo &MRI) {
3998 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3999 "unexpected opcode!");
4000 Register DstReg = I.getOperand(0).getReg();
4001 const LLT NarrowTy = MRI.getType(DstReg);
4002 const Register SrcReg = I.getOperand(1).getReg();
4003 const LLT WideTy = MRI.getType(SrcReg);
4004 (void)WideTy;
4005 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4006 "source register size too small!");
4007 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4008
4009 // Need the lane index to determine the correct copy opcode.
4010 MachineOperand &LaneIdxOp = I.getOperand(2);
4011 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4012
4013 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4014 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4015 return false;
4016 }
4017
4018 // Find the index to extract from.
4019 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4020 if (!VRegAndVal)
4021 return false;
4022 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4023
4024
4025 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4026 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4027 LaneIdx, MIB);
4028 if (!Extract)
4029 return false;
4030
4031 I.eraseFromParent();
4032 return true;
4033}
4034
4035bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4036 MachineInstr &I, MachineRegisterInfo &MRI) {
4037 unsigned NumElts = I.getNumOperands() - 1;
4038 Register SrcReg = I.getOperand(NumElts).getReg();
4039 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4040 const LLT SrcTy = MRI.getType(SrcReg);
4041
4042 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4043 if (SrcTy.getSizeInBits() > 128) {
4044 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4045 return false;
4046 }
4047
4048 // We implement a split vector operation by treating the sub-vectors as
4049 // scalars and extracting them.
4050 const RegisterBank &DstRB =
4051 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4052 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4053 Register Dst = I.getOperand(OpIdx).getReg();
4054 MachineInstr *Extract =
4055 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4056 if (!Extract)
4057 return false;
4058 }
4059 I.eraseFromParent();
4060 return true;
4061}
4062
4063bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4064 MachineRegisterInfo &MRI) {
4065 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4066 "unexpected opcode");
4067
4068 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4069 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4070 AArch64::FPRRegBankID ||
4071 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4072 AArch64::FPRRegBankID) {
4073 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4074 "currently unsupported.\n");
4075 return false;
4076 }
4077
4078 // The last operand is the vector source register, and every other operand is
4079 // a register to unpack into.
4080 unsigned NumElts = I.getNumOperands() - 1;
4081 Register SrcReg = I.getOperand(NumElts).getReg();
4082 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4083 const LLT WideTy = MRI.getType(SrcReg);
4084
4085 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4086 "source register size too small!");
4087
4088 if (!NarrowTy.isScalar())
4089 return selectSplitVectorUnmerge(I, MRI);
4090
4091 // Choose a lane copy opcode and subregister based off of the size of the
4092 // vector's elements.
4093 unsigned CopyOpc = 0;
4094 unsigned ExtractSubReg = 0;
4095 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4096 return false;
4097
4098 // Set up for the lane copies.
4099 MachineBasicBlock &MBB = *I.getParent();
4100
4101 // Stores the registers we'll be copying from.
4102 SmallVector<Register, 4> InsertRegs;
4103
4104 // We'll use the first register twice, so we only need NumElts-1 registers.
4105 unsigned NumInsertRegs = NumElts - 1;
4106
4107 // If our elements fit into exactly 128 bits, then we can copy from the source
4108 // directly. Otherwise, we need to do a bit of setup with some subregister
4109 // inserts.
4110 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4111 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4112 } else {
4113 // No. We have to perform subregister inserts. For each insert, create an
4114 // implicit def and a subregister insert, and save the register we create.
4115 // For scalar sources, treat as a pseudo-vector of NarrowTy elements.
4116 unsigned EltSize = WideTy.isVector() ? WideTy.getScalarSizeInBits()
4117 : NarrowTy.getSizeInBits();
4118 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4119 LLT::fixed_vector(NumElts, EltSize), *RBI.getRegBank(SrcReg, MRI, TRI));
4120 unsigned SubReg = 0;
4121 bool Found = getSubRegForClass(RC, TRI, SubReg);
4122 (void)Found;
4123 assert(Found && "expected to find last operand's subeg idx");
4124 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4125 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4126 MachineInstr &ImpDefMI =
4127 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4128 ImpDefReg);
4129
4130 // Now, create the subregister insert from SrcReg.
4131 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4132 MachineInstr &InsMI =
4133 *BuildMI(MBB, I, I.getDebugLoc(),
4134 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4135 .addUse(ImpDefReg)
4136 .addUse(SrcReg)
4137 .addImm(SubReg);
4138
4139 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4141
4142 // Save the register so that we can copy from it after.
4143 InsertRegs.push_back(InsertReg);
4144 }
4145 }
4146
4147 // Now that we've created any necessary subregister inserts, we can
4148 // create the copies.
4149 //
4150 // Perform the first copy separately as a subregister copy.
4151 Register CopyTo = I.getOperand(0).getReg();
4152 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4153 .addReg(InsertRegs[0], {}, ExtractSubReg);
4154 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4155
4156 // Now, perform the remaining copies as vector lane copies.
4157 unsigned LaneIdx = 1;
4158 for (Register InsReg : InsertRegs) {
4159 Register CopyTo = I.getOperand(LaneIdx).getReg();
4160 MachineInstr &CopyInst =
4161 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4162 .addUse(InsReg)
4163 .addImm(LaneIdx);
4164 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4165 ++LaneIdx;
4166 }
4167
4168 // Separately constrain the first copy's destination. Because of the
4169 // limitation in constrainOperandRegClass, we can't guarantee that this will
4170 // actually be constrained. So, do it ourselves using the second operand.
4171 const TargetRegisterClass *RC =
4172 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4173 if (!RC) {
4174 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4175 return false;
4176 }
4177
4178 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4179 I.eraseFromParent();
4180 return true;
4181}
4182
4183bool AArch64InstructionSelector::selectConcatVectors(
4184 MachineInstr &I, MachineRegisterInfo &MRI) {
4185 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4186 "Unexpected opcode");
4187 Register Dst = I.getOperand(0).getReg();
4188 Register Op1 = I.getOperand(1).getReg();
4189 Register Op2 = I.getOperand(2).getReg();
4190 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4191 if (!ConcatMI)
4192 return false;
4193 I.eraseFromParent();
4194 return true;
4195}
4196
4197unsigned
4198AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4199 MachineFunction &MF) const {
4200 Type *CPTy = CPVal->getType();
4201 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4202
4203 MachineConstantPool *MCP = MF.getConstantPool();
4204 return MCP->getConstantPoolIndex(CPVal, Alignment);
4205}
4206
4207MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4208 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4209 const TargetRegisterClass *RC;
4210 unsigned Opc;
4211 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4212 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4213 switch (Size) {
4214 case 16:
4215 RC = &AArch64::FPR128RegClass;
4216 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4217 break;
4218 case 8:
4219 RC = &AArch64::FPR64RegClass;
4220 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4221 break;
4222 case 4:
4223 RC = &AArch64::FPR32RegClass;
4224 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4225 break;
4226 case 2:
4227 RC = &AArch64::FPR16RegClass;
4228 Opc = AArch64::LDRHui;
4229 break;
4230 default:
4231 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4232 << *CPVal->getType());
4233 return nullptr;
4234 }
4235
4236 MachineInstr *LoadMI = nullptr;
4237 auto &MF = MIRBuilder.getMF();
4238 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4239 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4240 // Use load(literal) for tiny code model.
4241 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4242 } else {
4243 auto Adrp =
4244 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4245 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4246
4247 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4248 .addConstantPoolIndex(
4250
4252 }
4253
4254 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4255 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4257 Size, Align(Size)));
4259 return LoadMI;
4260}
4261
4262/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4263/// size and RB.
4264static std::pair<unsigned, unsigned>
4265getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4266 unsigned Opc, SubregIdx;
4267 if (RB.getID() == AArch64::GPRRegBankID) {
4268 if (EltSize == 8) {
4269 Opc = AArch64::INSvi8gpr;
4270 SubregIdx = AArch64::bsub;
4271 } else if (EltSize == 16) {
4272 Opc = AArch64::INSvi16gpr;
4273 SubregIdx = AArch64::ssub;
4274 } else if (EltSize == 32) {
4275 Opc = AArch64::INSvi32gpr;
4276 SubregIdx = AArch64::ssub;
4277 } else if (EltSize == 64) {
4278 Opc = AArch64::INSvi64gpr;
4279 SubregIdx = AArch64::dsub;
4280 } else {
4281 llvm_unreachable("invalid elt size!");
4282 }
4283 } else {
4284 if (EltSize == 8) {
4285 Opc = AArch64::INSvi8lane;
4286 SubregIdx = AArch64::bsub;
4287 } else if (EltSize == 16) {
4288 Opc = AArch64::INSvi16lane;
4289 SubregIdx = AArch64::hsub;
4290 } else if (EltSize == 32) {
4291 Opc = AArch64::INSvi32lane;
4292 SubregIdx = AArch64::ssub;
4293 } else if (EltSize == 64) {
4294 Opc = AArch64::INSvi64lane;
4295 SubregIdx = AArch64::dsub;
4296 } else {
4297 llvm_unreachable("invalid elt size!");
4298 }
4299 }
4300 return std::make_pair(Opc, SubregIdx);
4301}
4302
4303MachineInstr *AArch64InstructionSelector::emitInstr(
4304 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4305 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4306 const ComplexRendererFns &RenderFns) const {
4307 assert(Opcode && "Expected an opcode?");
4308 assert(!isPreISelGenericOpcode(Opcode) &&
4309 "Function should only be used to produce selected instructions!");
4310 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4311 if (RenderFns)
4312 for (auto &Fn : *RenderFns)
4313 Fn(MI);
4315 return &*MI;
4316}
4317
4318MachineInstr *AArch64InstructionSelector::emitAddSub(
4319 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4320 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4321 MachineIRBuilder &MIRBuilder) const {
4322 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4323 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4324 auto Ty = MRI.getType(LHS.getReg());
4325 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4326 unsigned Size = Ty.getSizeInBits();
4327 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4328 bool Is32Bit = Size == 32;
4329
4330 // INSTRri form with positive arithmetic immediate.
4331 if (auto Fns = selectArithImmed(RHS))
4332 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4333 MIRBuilder, Fns);
4334
4335 // INSTRri form with negative arithmetic immediate.
4336 if (auto Fns = selectNegArithImmed(RHS))
4337 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4338 MIRBuilder, Fns);
4339
4340 // INSTRrx form.
4341 if (auto Fns = selectArithExtendedRegister(RHS))
4342 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4343 MIRBuilder, Fns);
4344
4345 // INSTRrs form.
4346 if (auto Fns = selectShiftedRegister(RHS))
4347 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4348 MIRBuilder, Fns);
4349 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4350 MIRBuilder);
4351}
4352
4353MachineInstr *
4354AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4355 MachineOperand &RHS,
4356 MachineIRBuilder &MIRBuilder) const {
4357 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4358 {{AArch64::ADDXri, AArch64::ADDWri},
4359 {AArch64::ADDXrs, AArch64::ADDWrs},
4360 {AArch64::ADDXrr, AArch64::ADDWrr},
4361 {AArch64::SUBXri, AArch64::SUBWri},
4362 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4363 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4364}
4365
4366MachineInstr *
4367AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4368 MachineOperand &RHS,
4369 MachineIRBuilder &MIRBuilder) const {
4370 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4371 {{AArch64::ADDSXri, AArch64::ADDSWri},
4372 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4373 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4374 {AArch64::SUBSXri, AArch64::SUBSWri},
4375 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4376 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4377}
4378
4379MachineInstr *
4380AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4381 MachineOperand &RHS,
4382 MachineIRBuilder &MIRBuilder) const {
4383 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4384 {{AArch64::SUBSXri, AArch64::SUBSWri},
4385 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4386 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4387 {AArch64::ADDSXri, AArch64::ADDSWri},
4388 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4389 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4390}
4391
4392MachineInstr *
4393AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4394 MachineOperand &RHS,
4395 MachineIRBuilder &MIRBuilder) const {
4396 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4397 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4398 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4399 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4400 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4401}
4402
4403MachineInstr *
4404AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4405 MachineOperand &RHS,
4406 MachineIRBuilder &MIRBuilder) const {
4407 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4408 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4409 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4410 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4411 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4412}
4413
4414MachineInstr *
4415AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4416 MachineIRBuilder &MIRBuilder) const {
4417 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4418 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
4419 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4420 return emitSUBS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4421}
4422
4423MachineInstr *
4424AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4425 MachineIRBuilder &MIRBuilder) const {
4426 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4427 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4428 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4429 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4430}
4431
4432MachineInstr *
4433AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4434 MachineIRBuilder &MIRBuilder) const {
4435 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4436 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4437 LLT Ty = MRI.getType(LHS.getReg());
4438 unsigned RegSize = Ty.getSizeInBits();
4439 bool Is32Bit = (RegSize == 32);
4440 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4441 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4442 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4443 // ANDS needs a logical immediate for its immediate form. Check if we can
4444 // fold one in.
4445 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4446 int64_t Imm = ValAndVReg->Value.getSExtValue();
4447
4449 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4452 return &*TstMI;
4453 }
4454 }
4455
4456 if (auto Fns = selectLogicalShiftedRegister(RHS))
4457 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4458 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4459}
4460
4461MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4462 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4463 MachineIRBuilder &MIRBuilder) const {
4464 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4465 assert(Predicate.isPredicate() && "Expected predicate?");
4466 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4467 LLT CmpTy = MRI.getType(LHS.getReg());
4468 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4469 unsigned Size = CmpTy.getSizeInBits();
4470 (void)Size;
4471 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4472 // Fold the compare into a cmn or tst if possible.
4473 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4474 return FoldCmp;
4475 return emitCMP(LHS, RHS, MIRBuilder);
4476}
4477
4478MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4479 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4480 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4481#ifndef NDEBUG
4482 LLT Ty = MRI.getType(Dst);
4483 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4484 "Expected a 32-bit scalar register?");
4485#endif
4486 const Register ZReg = AArch64::WZR;
4487 AArch64CC::CondCode CC1, CC2;
4488 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4489 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4490 if (CC2 == AArch64CC::AL)
4491 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4492 MIRBuilder);
4493 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4494 Register Def1Reg = MRI.createVirtualRegister(RC);
4495 Register Def2Reg = MRI.createVirtualRegister(RC);
4496 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4497 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4498 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4499 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4501 return &*OrMI;
4502}
4503
4504MachineInstr *AArch64InstructionSelector::emitFPCompare(
4505 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4506 std::optional<CmpInst::Predicate> Pred) const {
4507 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4508 LLT Ty = MRI.getType(LHS);
4509 if (Ty.isVector())
4510 return nullptr;
4511 unsigned OpSize = Ty.getSizeInBits();
4512 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4513
4514 // If this is a compare against +0.0, then we don't have
4515 // to explicitly materialize a constant.
4516 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4517 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4518
4519 auto IsEqualityPred = [](CmpInst::Predicate P) {
4520 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4522 };
4523 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4524 // Try commuting the operands.
4525 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4526 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4527 ShouldUseImm = true;
4528 std::swap(LHS, RHS);
4529 }
4530 }
4531 unsigned CmpOpcTbl[2][3] = {
4532 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4533 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4534 unsigned CmpOpc =
4535 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4536
4537 // Partially build the compare. Decide if we need to add a use for the
4538 // third operand based off whether or not we're comparing against 0.0.
4539 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4541 if (!ShouldUseImm)
4542 CmpMI.addUse(RHS);
4544 return &*CmpMI;
4545}
4546
4547MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4548 std::optional<Register> Dst, Register Op1, Register Op2,
4549 MachineIRBuilder &MIRBuilder) const {
4550 // We implement a vector concat by:
4551 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4552 // 2. Insert the upper vector into the destination's upper element
4553 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4554 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4555
4556 const LLT Op1Ty = MRI.getType(Op1);
4557 const LLT Op2Ty = MRI.getType(Op2);
4558
4559 if (Op1Ty != Op2Ty) {
4560 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4561 return nullptr;
4562 }
4563 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4564
4565 if (Op1Ty.getSizeInBits() >= 128) {
4566 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4567 return nullptr;
4568 }
4569
4570 // At the moment we just support 64 bit vector concats.
4571 if (Op1Ty.getSizeInBits() != 64) {
4572 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4573 return nullptr;
4574 }
4575
4576 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4577 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4578 const TargetRegisterClass *DstRC =
4579 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4580
4581 MachineInstr *WidenedOp1 =
4582 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4583 MachineInstr *WidenedOp2 =
4584 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4585 if (!WidenedOp1 || !WidenedOp2) {
4586 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4587 return nullptr;
4588 }
4589
4590 // Now do the insert of the upper element.
4591 unsigned InsertOpc, InsSubRegIdx;
4592 std::tie(InsertOpc, InsSubRegIdx) =
4593 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4594
4595 if (!Dst)
4596 Dst = MRI.createVirtualRegister(DstRC);
4597 auto InsElt =
4598 MIRBuilder
4599 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4600 .addImm(1) /* Lane index */
4601 .addUse(WidenedOp2->getOperand(0).getReg())
4602 .addImm(0);
4604 return &*InsElt;
4605}
4606
4607MachineInstr *
4608AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4609 Register Src2, AArch64CC::CondCode Pred,
4610 MachineIRBuilder &MIRBuilder) const {
4611 auto &MRI = *MIRBuilder.getMRI();
4612 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4613 // If we used a register class, then this won't necessarily have an LLT.
4614 // Compute the size based off whether or not we have a class or bank.
4615 unsigned Size;
4616 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4617 Size = TRI.getRegSizeInBits(*RC);
4618 else
4619 Size = MRI.getType(Dst).getSizeInBits();
4620 // Some opcodes use s1.
4621 assert(Size <= 64 && "Expected 64 bits or less only!");
4622 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4623 unsigned Opc = OpcTable[Size == 64];
4624 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4626 return &*CSINC;
4627}
4628
4629MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4630 Register CarryReg) {
4631 MachineRegisterInfo *MRI = MIB.getMRI();
4632 unsigned Opcode = I.getOpcode();
4633
4634 // If the instruction is a SUB, we need to negate the carry,
4635 // because borrowing is indicated by carry-flag == 0.
4636 bool NeedsNegatedCarry =
4637 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4638
4639 // If the previous instruction will already produce the correct carry, do not
4640 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4641 // generated during legalization of wide add/sub. This optimization depends on
4642 // these sequences not being interrupted by other instructions.
4643 // We have to select the previous instruction before the carry-using
4644 // instruction is deleted by the calling function, otherwise the previous
4645 // instruction might become dead and would get deleted.
4646 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4647 if (SrcMI == I.getPrevNode()) {
4648 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4649 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4650 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4651 CarrySrcMI->isUnsigned() &&
4652 CarrySrcMI->getCarryOutReg() == CarryReg &&
4653 selectAndRestoreState(*SrcMI))
4654 return nullptr;
4655 }
4656 }
4657
4658 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4659
4660 if (NeedsNegatedCarry) {
4661 // (0 - Carry) sets !C in NZCV when Carry == 1
4662 Register ZReg = AArch64::WZR;
4663 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4664 }
4665
4666 // (Carry - 1) sets !C in NZCV when Carry == 0
4667 auto Fns = select12BitValueWithLeftShift(1);
4668 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4669}
4670
4671bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4672 MachineRegisterInfo &MRI) {
4673 auto &CarryMI = cast<GAddSubCarryOut>(I);
4674
4675 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4676 // Set NZCV carry according to carry-in VReg
4677 emitCarryIn(I, CarryInMI->getCarryInReg());
4678 }
4679
4680 // Emit the operation and get the correct condition code.
4681 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4682 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4683
4684 Register CarryOutReg = CarryMI.getCarryOutReg();
4685
4686 // Don't convert carry-out to VReg if it is never used
4687 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4688 // Now, put the overflow result in the register given by the first operand
4689 // to the overflow op. CSINC increments the result when the predicate is
4690 // false, so to get the increment when it's true, we need to use the
4691 // inverse. In this case, we want to increment when carry is set.
4692 Register ZReg = AArch64::WZR;
4693 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4694 getInvertedCondCode(OpAndCC.second), MIB);
4695 }
4696
4697 I.eraseFromParent();
4698 return true;
4699}
4700
4701std::pair<MachineInstr *, AArch64CC::CondCode>
4702AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4703 MachineOperand &LHS,
4704 MachineOperand &RHS,
4705 MachineIRBuilder &MIRBuilder) const {
4706 switch (Opcode) {
4707 default:
4708 llvm_unreachable("Unexpected opcode!");
4709 case TargetOpcode::G_SADDO:
4710 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4711 case TargetOpcode::G_UADDO:
4712 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4713 case TargetOpcode::G_SSUBO:
4714 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4715 case TargetOpcode::G_USUBO:
4716 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4717 case TargetOpcode::G_SADDE:
4718 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4719 case TargetOpcode::G_UADDE:
4720 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4721 case TargetOpcode::G_SSUBE:
4722 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4723 case TargetOpcode::G_USUBE:
4724 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4725 }
4726}
4727
4728/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4729/// expressed as a conjunction.
4730/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4731/// changing the conditions on the CMP tests.
4732/// (this means we can call emitConjunctionRec() with
4733/// Negate==true on this sub-tree)
4734/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4735/// cannot do the negation naturally. We are required to
4736/// emit the subtree first in this case.
4737/// \param WillNegate Is true if are called when the result of this
4738/// subexpression must be negated. This happens when the
4739/// outer expression is an OR. We can use this fact to know
4740/// that we have a double negation (or (or ...) ...) that
4741/// can be implemented for free.
4742static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4743 bool WillNegate, MachineRegisterInfo &MRI,
4744 unsigned Depth = 0) {
4745 if (!MRI.hasOneNonDBGUse(Val))
4746 return false;
4747 MachineInstr *ValDef = MRI.getVRegDef(Val);
4748 unsigned Opcode = ValDef->getOpcode();
4749 if (isa<GAnyCmp>(ValDef)) {
4750 CanNegate = true;
4751 MustBeFirst = false;
4752 return true;
4753 }
4754 // Protect against exponential runtime and stack overflow.
4755 if (Depth > 6)
4756 return false;
4757 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4758 bool IsOR = Opcode == TargetOpcode::G_OR;
4759 Register O0 = ValDef->getOperand(1).getReg();
4760 Register O1 = ValDef->getOperand(2).getReg();
4761 bool CanNegateL;
4762 bool MustBeFirstL;
4763 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4764 return false;
4765 bool CanNegateR;
4766 bool MustBeFirstR;
4767 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4768 return false;
4769
4770 if (MustBeFirstL && MustBeFirstR)
4771 return false;
4772
4773 if (IsOR) {
4774 // For an OR expression we need to be able to naturally negate at least
4775 // one side or we cannot do the transformation at all.
4776 if (!CanNegateL && !CanNegateR)
4777 return false;
4778 // If we the result of the OR will be negated and we can naturally negate
4779 // the leaves, then this sub-tree as a whole negates naturally.
4780 CanNegate = WillNegate && CanNegateL && CanNegateR;
4781 // If we cannot naturally negate the whole sub-tree, then this must be
4782 // emitted first.
4783 MustBeFirst = !CanNegate;
4784 } else {
4785 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4786 // We cannot naturally negate an AND operation.
4787 CanNegate = false;
4788 MustBeFirst = MustBeFirstL || MustBeFirstR;
4789 }
4790 return true;
4791 }
4792 return false;
4793}
4794
4795MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4798 MachineIRBuilder &MIB) const {
4799 auto &MRI = *MIB.getMRI();
4800 LLT OpTy = MRI.getType(LHS);
4801 unsigned CCmpOpc;
4802 std::optional<ValueAndVReg> C;
4803 if (CmpInst::isIntPredicate(CC)) {
4804 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4806 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4807 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4808 else if (C->Value.ule(31))
4809 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4810 else
4811 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4812 } else {
4813 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4814 OpTy.getSizeInBits() == 64);
4815 switch (OpTy.getSizeInBits()) {
4816 case 16:
4817 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4818 CCmpOpc = AArch64::FCCMPHrr;
4819 break;
4820 case 32:
4821 CCmpOpc = AArch64::FCCMPSrr;
4822 break;
4823 case 64:
4824 CCmpOpc = AArch64::FCCMPDrr;
4825 break;
4826 default:
4827 return nullptr;
4828 }
4829 }
4831 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4832 auto CCmp =
4833 MIB.buildInstr(CCmpOpc, {}, {LHS});
4834 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4835 CCmp.addImm(C->Value.getZExtValue());
4836 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4837 CCmp.addImm(C->Value.abs().getZExtValue());
4838 else
4839 CCmp.addReg(RHS);
4840 CCmp.addImm(NZCV).addImm(Predicate);
4842 return &*CCmp;
4843}
4844
4845MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4846 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4847 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4848 // We're at a tree leaf, produce a conditional comparison operation.
4849 auto &MRI = *MIB.getMRI();
4850 MachineInstr *ValDef = MRI.getVRegDef(Val);
4851 unsigned Opcode = ValDef->getOpcode();
4852 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4853 Register LHS = Cmp->getLHSReg();
4854 Register RHS = Cmp->getRHSReg();
4855 CmpInst::Predicate CC = Cmp->getCond();
4856 if (Negate)
4858 if (isa<GICmp>(Cmp)) {
4859 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4860 } else {
4861 // Handle special FP cases.
4862 AArch64CC::CondCode ExtraCC;
4863 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4864 // Some floating point conditions can't be tested with a single condition
4865 // code. Construct an additional comparison in this case.
4866 if (ExtraCC != AArch64CC::AL) {
4867 MachineInstr *ExtraCmp;
4868 if (!CCOp)
4869 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4870 else
4871 ExtraCmp =
4872 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4873 CCOp = ExtraCmp->getOperand(0).getReg();
4874 Predicate = ExtraCC;
4875 }
4876 }
4877
4878 // Produce a normal comparison if we are first in the chain
4879 if (!CCOp) {
4880 if (isa<GICmp>(Cmp))
4881 return emitCMP(Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4882 return emitFPCompare(Cmp->getOperand(2).getReg(),
4883 Cmp->getOperand(3).getReg(), MIB);
4884 }
4885 // Otherwise produce a ccmp.
4886 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4887 }
4888 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4889
4890 bool IsOR = Opcode == TargetOpcode::G_OR;
4891
4892 Register LHS = ValDef->getOperand(1).getReg();
4893 bool CanNegateL;
4894 bool MustBeFirstL;
4895 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4896 assert(ValidL && "Valid conjunction/disjunction tree");
4897 (void)ValidL;
4898
4899 Register RHS = ValDef->getOperand(2).getReg();
4900 bool CanNegateR;
4901 bool MustBeFirstR;
4902 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4903 assert(ValidR && "Valid conjunction/disjunction tree");
4904 (void)ValidR;
4905
4906 // Swap sub-tree that must come first to the right side.
4907 if (MustBeFirstL) {
4908 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4909 std::swap(LHS, RHS);
4910 std::swap(CanNegateL, CanNegateR);
4911 std::swap(MustBeFirstL, MustBeFirstR);
4912 }
4913
4914 bool NegateR;
4915 bool NegateAfterR;
4916 bool NegateL;
4917 bool NegateAfterAll;
4918 if (Opcode == TargetOpcode::G_OR) {
4919 // Swap the sub-tree that we can negate naturally to the left.
4920 if (!CanNegateL) {
4921 assert(CanNegateR && "at least one side must be negatable");
4922 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4923 assert(!Negate);
4924 std::swap(LHS, RHS);
4925 NegateR = false;
4926 NegateAfterR = true;
4927 } else {
4928 // Negate the left sub-tree if possible, otherwise negate the result.
4929 NegateR = CanNegateR;
4930 NegateAfterR = !CanNegateR;
4931 }
4932 NegateL = true;
4933 NegateAfterAll = !Negate;
4934 } else {
4935 assert(Opcode == TargetOpcode::G_AND &&
4936 "Valid conjunction/disjunction tree");
4937 assert(!Negate && "Valid conjunction/disjunction tree");
4938
4939 NegateL = false;
4940 NegateR = false;
4941 NegateAfterR = false;
4942 NegateAfterAll = false;
4943 }
4944
4945 // Emit sub-trees.
4946 AArch64CC::CondCode RHSCC;
4947 MachineInstr *CmpR =
4948 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4949 if (NegateAfterR)
4950 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4951 MachineInstr *CmpL = emitConjunctionRec(
4952 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4953 if (NegateAfterAll)
4954 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4955 return CmpL;
4956}
4957
4958MachineInstr *AArch64InstructionSelector::emitConjunction(
4959 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4960 bool DummyCanNegate;
4961 bool DummyMustBeFirst;
4962 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4963 *MIB.getMRI()))
4964 return nullptr;
4965 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4966}
4967
4968bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4969 MachineInstr &CondMI) {
4970 AArch64CC::CondCode AArch64CC;
4971 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4972 if (!ConjMI)
4973 return false;
4974
4975 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4976 SelI.eraseFromParent();
4977 return true;
4978}
4979
4980bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4981 MachineRegisterInfo &MRI = *MIB.getMRI();
4982 // We want to recognize this pattern:
4983 //
4984 // $z = G_FCMP pred, $x, $y
4985 // ...
4986 // $w = G_SELECT $z, $a, $b
4987 //
4988 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4989 // some copies/truncs in between.)
4990 //
4991 // If we see this, then we can emit something like this:
4992 //
4993 // fcmp $x, $y
4994 // fcsel $w, $a, $b, pred
4995 //
4996 // Rather than emitting both of the rather long sequences in the standard
4997 // G_FCMP/G_SELECT select methods.
4998
4999 // First, check if the condition is defined by a compare.
5000 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5001
5002 // We can only fold if all of the defs have one use.
5003 Register CondDefReg = CondDef->getOperand(0).getReg();
5004 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5005 // Unless it's another select.
5006 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5007 if (CondDef == &UI)
5008 continue;
5009 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5010 return false;
5011 }
5012 }
5013
5014 // Is the condition defined by a compare?
5015 unsigned CondOpc = CondDef->getOpcode();
5016 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5017 if (tryOptSelectConjunction(I, *CondDef))
5018 return true;
5019 return false;
5020 }
5021
5023 if (CondOpc == TargetOpcode::G_ICMP) {
5024 auto &PredOp = CondDef->getOperand(1);
5025 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5026 MIB);
5027 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5028 CondCode =
5029 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5030 } else {
5031 // Get the condition code for the select.
5032 auto Pred =
5033 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5034 AArch64CC::CondCode CondCode2;
5035 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5036
5037 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5038 // instructions to emit the comparison.
5039 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5040 // unnecessary.
5041 if (CondCode2 != AArch64CC::AL)
5042 return false;
5043
5044 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5045 CondDef->getOperand(3).getReg(), MIB)) {
5046 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5047 return false;
5048 }
5049 }
5050
5051 // Emit the select.
5052 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5053 I.getOperand(3).getReg(), CondCode, MIB);
5054 I.eraseFromParent();
5055 return true;
5056}
5057
5058MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5059 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5060 MachineIRBuilder &MIRBuilder) const {
5061 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5062 "Unexpected MachineOperand");
5063 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5064 // We want to find this sort of thing:
5065 // x = G_SUB 0, y
5066 // G_ICMP z, x
5067 //
5068 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5069 // e.g:
5070 //
5071 // cmn z, y
5072
5073 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5074 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5075 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5076 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5077
5078 // Given this:
5079 //
5080 // x = G_SUB 0, y
5081 // G_ICMP z, x
5082 //
5083 // Produce this:
5084 //
5085 // cmn z, y
5086 if (isCMN(RHSDef, P, MRI))
5087 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5088
5089 // Same idea here, but with the LHS of the compare instead:
5090 //
5091 // Given this:
5092 //
5093 // x = G_SUB 0, y
5094 // G_ICMP x, z
5095 //
5096 // Produce this:
5097 //
5098 // cmn y, z
5099 //
5100 // But be careful! We need to swap the predicate!
5101 if (isCMN(LHSDef, P, MRI)) {
5102 if (!CmpInst::isEquality(P)) {
5105 }
5106 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5107 }
5108
5109 // Given this:
5110 //
5111 // z = G_AND x, y
5112 // G_ICMP z, 0
5113 //
5114 // Produce this if the compare is signed:
5115 //
5116 // tst x, y
5117 if (!CmpInst::isUnsigned(P) && LHSDef &&
5118 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5119 // Make sure that the RHS is 0.
5120 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5121 if (!ValAndVReg || ValAndVReg->Value != 0)
5122 return nullptr;
5123
5124 return emitTST(LHSDef->getOperand(1),
5125 LHSDef->getOperand(2), MIRBuilder);
5126 }
5127
5128 return nullptr;
5129}
5130
5131bool AArch64InstructionSelector::selectShuffleVector(
5132 MachineInstr &I, MachineRegisterInfo &MRI) {
5133 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5134 Register Src1Reg = I.getOperand(1).getReg();
5135 Register Src2Reg = I.getOperand(2).getReg();
5136 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5137
5138 MachineBasicBlock &MBB = *I.getParent();
5139 MachineFunction &MF = *MBB.getParent();
5140 LLVMContext &Ctx = MF.getFunction().getContext();
5141
5142 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5143
5145 for (int Val : Mask) {
5146 // For now, any undef indexes we'll just assume to be 0. This should be
5147 // optimized in future, e.g. to select DUP etc.
5148 Val = Val < 0 ? 0 : Val;
5149 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5150 unsigned Offset = Byte + Val * BytesPerElt;
5151 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5152 }
5153 }
5154
5155 // Use a constant pool to load the index vector for TBL.
5156 Constant *CPVal = ConstantVector::get(CstIdxs);
5157 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5158 if (!IndexLoad) {
5159 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5160 return false;
5161 }
5162
5163 if (DstTy.getSizeInBits() != 128) {
5164 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5165 // This case can be done with TBL1.
5166 MachineInstr *Concat =
5167 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5168 if (!Concat) {
5169 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5170 return false;
5171 }
5172
5173 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5174 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5175 IndexLoad->getOperand(0).getReg(), MIB);
5176
5177 auto TBL1 = MIB.buildInstr(
5178 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5179 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5181
5182 auto Copy =
5183 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5184 .addReg(TBL1.getReg(0), {}, AArch64::dsub);
5185 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5186 I.eraseFromParent();
5187 return true;
5188 }
5189
5190 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5191 // Q registers for regalloc.
5192 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5193 auto RegSeq = createQTuple(Regs, MIB);
5194 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5195 {RegSeq, IndexLoad->getOperand(0)});
5197 I.eraseFromParent();
5198 return true;
5199}
5200
5201MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5202 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5203 unsigned LaneIdx, const RegisterBank &RB,
5204 MachineIRBuilder &MIRBuilder) const {
5205 MachineInstr *InsElt = nullptr;
5206 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5207 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5208
5209 // Create a register to define with the insert if one wasn't passed in.
5210 if (!DstReg)
5211 DstReg = MRI.createVirtualRegister(DstRC);
5212
5213 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5214 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5215
5216 if (RB.getID() == AArch64::FPRRegBankID) {
5217 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5218 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5219 .addImm(LaneIdx)
5220 .addUse(InsSub->getOperand(0).getReg())
5221 .addImm(0);
5222 } else {
5223 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5224 .addImm(LaneIdx)
5225 .addUse(EltReg);
5226 }
5227
5229 return InsElt;
5230}
5231
5232bool AArch64InstructionSelector::selectUSMovFromExtend(
5233 MachineInstr &MI, MachineRegisterInfo &MRI) {
5234 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5235 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5236 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5237 return false;
5238 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5239 const Register DefReg = MI.getOperand(0).getReg();
5240 const LLT DstTy = MRI.getType(DefReg);
5241 unsigned DstSize = DstTy.getSizeInBits();
5242
5243 if (DstSize != 32 && DstSize != 64)
5244 return false;
5245
5246 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5247 MI.getOperand(1).getReg(), MRI);
5248 int64_t Lane;
5249 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5250 return false;
5251 Register Src0 = Extract->getOperand(1).getReg();
5252
5253 const LLT VecTy = MRI.getType(Src0);
5254 if (VecTy.isScalableVector())
5255 return false;
5256
5257 if (VecTy.getSizeInBits() != 128) {
5258 const MachineInstr *ScalarToVector = emitScalarToVector(
5259 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5260 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5261 Src0 = ScalarToVector->getOperand(0).getReg();
5262 }
5263
5264 unsigned Opcode;
5265 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5266 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5267 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5268 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5269 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5270 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5271 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5272 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5273 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5274 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5275 else
5276 llvm_unreachable("Unexpected type combo for S/UMov!");
5277
5278 // We may need to generate one of these, depending on the type and sign of the
5279 // input:
5280 // DstReg = SMOV Src0, Lane;
5281 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5282 MachineInstr *ExtI = nullptr;
5283 if (DstSize == 64 && !IsSigned) {
5284 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5285 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5286 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5287 .addUse(NewReg)
5288 .addImm(AArch64::sub_32);
5289 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5290 } else
5291 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5292
5294 MI.eraseFromParent();
5295 return true;
5296}
5297
5298MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5299 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5300 unsigned int Op;
5301 if (DstSize == 128) {
5302 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5303 return nullptr;
5304 Op = AArch64::MOVIv16b_ns;
5305 } else {
5306 Op = AArch64::MOVIv8b_ns;
5307 }
5308
5309 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5310
5313 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5315 return &*Mov;
5316 }
5317 return nullptr;
5318}
5319
5320MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5321 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5322 bool Inv) {
5323
5324 unsigned int Op;
5325 if (DstSize == 128) {
5326 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5327 return nullptr;
5328 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5329 } else {
5330 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5331 }
5332
5333 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5334 uint64_t Shift;
5335
5338 Shift = 0;
5339 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5341 Shift = 8;
5342 } else
5343 return nullptr;
5344
5345 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5347 return &*Mov;
5348}
5349
5350MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5351 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5352 bool Inv) {
5353
5354 unsigned int Op;
5355 if (DstSize == 128) {
5356 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5357 return nullptr;
5358 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5359 } else {
5360 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5361 }
5362
5363 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5364 uint64_t Shift;
5365
5368 Shift = 0;
5369 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5371 Shift = 8;
5372 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5374 Shift = 16;
5375 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5377 Shift = 24;
5378 } else
5379 return nullptr;
5380
5381 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5383 return &*Mov;
5384}
5385
5386MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5387 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5388
5389 unsigned int Op;
5390 if (DstSize == 128) {
5391 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5392 return nullptr;
5393 Op = AArch64::MOVIv2d_ns;
5394 } else {
5395 Op = AArch64::MOVID;
5396 }
5397
5398 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5401 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5403 return &*Mov;
5404 }
5405 return nullptr;
5406}
5407
5408MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5409 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5410 bool Inv) {
5411
5412 unsigned int Op;
5413 if (DstSize == 128) {
5414 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5415 return nullptr;
5416 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5417 } else {
5418 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5419 }
5420
5421 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5422 uint64_t Shift;
5423
5426 Shift = 264;
5427 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5429 Shift = 272;
5430 } else
5431 return nullptr;
5432
5433 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5435 return &*Mov;
5436}
5437
5438MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5439 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5440
5441 unsigned int Op;
5442 bool IsWide = false;
5443 if (DstSize == 128) {
5444 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5445 return nullptr;
5446 Op = AArch64::FMOVv4f32_ns;
5447 IsWide = true;
5448 } else {
5449 Op = AArch64::FMOVv2f32_ns;
5450 }
5451
5452 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5453
5456 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5458 Op = AArch64::FMOVv2f64_ns;
5459 } else
5460 return nullptr;
5461
5462 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5464 return &*Mov;
5465}
5466
5467bool AArch64InstructionSelector::selectIndexedExtLoad(
5468 MachineInstr &MI, MachineRegisterInfo &MRI) {
5469 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5470 Register Dst = ExtLd.getDstReg();
5471 Register WriteBack = ExtLd.getWritebackReg();
5472 Register Base = ExtLd.getBaseReg();
5473 Register Offset = ExtLd.getOffsetReg();
5474 LLT Ty = MRI.getType(Dst);
5475 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5476 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5477 bool IsPre = ExtLd.isPre();
5478 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5479 unsigned InsertIntoSubReg = 0;
5480 bool IsDst64 = Ty.getSizeInBits() == 64;
5481
5482 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5483 // long as they are scalar.
5484 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5485 if ((IsSExt && IsFPR) || Ty.isVector())
5486 return false;
5487
5488 unsigned Opc = 0;
5489 LLT NewLdDstTy;
5490 LLT s32 = LLT::scalar(32);
5491 LLT s64 = LLT::scalar(64);
5492
5493 if (MemSizeBits == 8) {
5494 if (IsSExt) {
5495 if (IsDst64)
5496 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5497 else
5498 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5499 NewLdDstTy = IsDst64 ? s64 : s32;
5500 } else if (IsFPR) {
5501 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5502 InsertIntoSubReg = AArch64::bsub;
5503 NewLdDstTy = LLT::scalar(MemSizeBits);
5504 } else {
5505 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5506 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5507 NewLdDstTy = s32;
5508 }
5509 } else if (MemSizeBits == 16) {
5510 if (IsSExt) {
5511 if (IsDst64)
5512 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5513 else
5514 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5515 NewLdDstTy = IsDst64 ? s64 : s32;
5516 } else if (IsFPR) {
5517 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5518 InsertIntoSubReg = AArch64::hsub;
5519 NewLdDstTy = LLT::scalar(MemSizeBits);
5520 } else {
5521 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5522 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5523 NewLdDstTy = s32;
5524 }
5525 } else if (MemSizeBits == 32) {
5526 if (IsSExt) {
5527 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5528 NewLdDstTy = s64;
5529 } else if (IsFPR) {
5530 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5531 InsertIntoSubReg = AArch64::ssub;
5532 NewLdDstTy = LLT::scalar(MemSizeBits);
5533 } else {
5534 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5535 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5536 NewLdDstTy = s32;
5537 }
5538 } else {
5539 llvm_unreachable("Unexpected size for indexed load");
5540 }
5541
5542 auto Cst = getIConstantVRegVal(Offset, MRI);
5543 if (!Cst)
5544 return false; // Shouldn't happen, but just in case.
5545
5546 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5547 .addImm(Cst->getSExtValue());
5548 LdMI.cloneMemRefs(ExtLd);
5550 // Make sure to select the load with the MemTy as the dest type, and then
5551 // insert into a larger reg if needed.
5552 if (InsertIntoSubReg) {
5553 // Generate a SUBREG_TO_REG.
5554 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5555 .addUse(LdMI.getReg(1))
5556 .addImm(InsertIntoSubReg);
5558 SubToReg.getReg(0),
5559 *getRegClassForTypeOnBank(MRI.getType(Dst),
5560 *RBI.getRegBank(Dst, MRI, TRI)),
5561 MRI);
5562 } else {
5563 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5564 selectCopy(*Copy, TII, MRI, TRI, RBI);
5565 }
5566 MI.eraseFromParent();
5567
5568 return true;
5569}
5570
5571bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5572 MachineRegisterInfo &MRI) {
5573 auto &Ld = cast<GIndexedLoad>(MI);
5574 Register Dst = Ld.getDstReg();
5575 Register WriteBack = Ld.getWritebackReg();
5576 Register Base = Ld.getBaseReg();
5577 Register Offset = Ld.getOffsetReg();
5578 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5579 "Unexpected type for indexed load");
5580 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5581
5582 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5583 return selectIndexedExtLoad(MI, MRI);
5584
5585 unsigned Opc = 0;
5586 if (Ld.isPre()) {
5587 static constexpr unsigned GPROpcodes[] = {
5588 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5589 AArch64::LDRXpre};
5590 static constexpr unsigned FPROpcodes[] = {
5591 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5592 AArch64::LDRQpre};
5593 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5594 ? FPROpcodes[Log2_32(MemSize)]
5595 : GPROpcodes[Log2_32(MemSize)];
5596 ;
5597 } else {
5598 static constexpr unsigned GPROpcodes[] = {
5599 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5600 AArch64::LDRXpost};
5601 static constexpr unsigned FPROpcodes[] = {
5602 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5603 AArch64::LDRDpost, AArch64::LDRQpost};
5604 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5605 ? FPROpcodes[Log2_32(MemSize)]
5606 : GPROpcodes[Log2_32(MemSize)];
5607 ;
5608 }
5609 auto Cst = getIConstantVRegVal(Offset, MRI);
5610 if (!Cst)
5611 return false; // Shouldn't happen, but just in case.
5612 auto LdMI =
5613 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5614 LdMI.cloneMemRefs(Ld);
5616 MI.eraseFromParent();
5617 return true;
5618}
5619
5620bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5621 MachineRegisterInfo &MRI) {
5622 Register Dst = I.getWritebackReg();
5623 Register Val = I.getValueReg();
5624 Register Base = I.getBaseReg();
5625 Register Offset = I.getOffsetReg();
5626 assert(MRI.getType(Val).getSizeInBits() <= 128 &&
5627 "Unexpected type for indexed store");
5628
5629 LocationSize MemSize = I.getMMO().getSize();
5630 unsigned MemSizeInBytes = MemSize.getValue();
5631
5632 assert(MemSizeInBytes && MemSizeInBytes <= 16 &&
5633 "Unexpected indexed store size");
5634 unsigned MemSizeLog2 = Log2_32(MemSizeInBytes);
5635
5636 unsigned Opc = 0;
5637 if (I.isPre()) {
5638 static constexpr unsigned GPROpcodes[] = {
5639 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5640 AArch64::STRXpre};
5641 static constexpr unsigned FPROpcodes[] = {
5642 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5643 AArch64::STRQpre};
5644
5645 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5646 Opc = FPROpcodes[MemSizeLog2];
5647 else
5648 Opc = GPROpcodes[MemSizeLog2];
5649 } else {
5650 static constexpr unsigned GPROpcodes[] = {
5651 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5652 AArch64::STRXpost};
5653 static constexpr unsigned FPROpcodes[] = {
5654 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5655 AArch64::STRDpost, AArch64::STRQpost};
5656
5657 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5658 Opc = FPROpcodes[MemSizeLog2];
5659 else
5660 Opc = GPROpcodes[MemSizeLog2];
5661 }
5662
5663 auto Cst = getIConstantVRegVal(Offset, MRI);
5664 if (!Cst)
5665 return false; // Shouldn't happen, but just in case.
5666 auto Str =
5667 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5668 Str.cloneMemRefs(I);
5670 I.eraseFromParent();
5671 return true;
5672}
5673
5674MachineInstr *
5675AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5676 MachineIRBuilder &MIRBuilder,
5677 MachineRegisterInfo &MRI) {
5678 LLT DstTy = MRI.getType(Dst);
5679 unsigned DstSize = DstTy.getSizeInBits();
5680 assert((DstSize == 64 || DstSize == 128) &&
5681 "Unexpected vector constant size");
5682
5683 if (CV->isNullValue()) {
5684 if (DstSize == 128) {
5685 auto Mov =
5686 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5688 return &*Mov;
5689 }
5690
5691 if (DstSize == 64) {
5692 auto Mov =
5693 MIRBuilder
5694 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5695 .addImm(0);
5696 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5697 .addReg(Mov.getReg(0), {}, AArch64::dsub);
5698 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5699 return &*Copy;
5700 }
5701 }
5702
5703 if (Constant *SplatValue = CV->getSplatValue()) {
5704 APInt SplatValueAsInt =
5705 isa<ConstantFP>(SplatValue)
5706 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5707 : SplatValue->getUniqueInteger();
5708 APInt DefBits = APInt::getSplat(
5709 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5710 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5711 MachineInstr *NewOp;
5712 bool Inv = false;
5713 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5714 (NewOp =
5715 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5716 (NewOp =
5717 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5718 (NewOp =
5719 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5720 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5721 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5722 return NewOp;
5723
5724 DefBits = ~DefBits;
5725 Inv = true;
5726 if ((NewOp =
5727 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5728 (NewOp =
5729 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5730 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5731 return NewOp;
5732 return nullptr;
5733 };
5734
5735 if (auto *NewOp = TryMOVIWithBits(DefBits))
5736 return NewOp;
5737
5738 // See if a fneg of the constant can be materialized with a MOVI, etc
5739 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5740 unsigned NegOpc) -> MachineInstr * {
5741 // FNegate each sub-element of the constant
5742 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5743 APInt NegBits(DstSize, 0);
5744 unsigned NumElts = DstSize / NumBits;
5745 for (unsigned i = 0; i < NumElts; i++)
5746 NegBits |= Neg << (NumBits * i);
5747 NegBits = DefBits ^ NegBits;
5748
5749 // Try to create the new constants with MOVI, and if so generate a fneg
5750 // for it.
5751 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5752 Register NewDst = MRI.createVirtualRegister(
5753 DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5754 NewOp->getOperand(0).setReg(NewDst);
5755 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5756 }
5757 return nullptr;
5758 };
5759 MachineInstr *R;
5760 if ((R = TryWithFNeg(DefBits, 32,
5761 DstSize == 64 ? AArch64::FNEGv2f32
5762 : AArch64::FNEGv4f32)) ||
5763 (R = TryWithFNeg(DefBits, 64,
5764 DstSize == 64 ? AArch64::FNEGDr
5765 : AArch64::FNEGv2f64)) ||
5766 (STI.hasFullFP16() &&
5767 (R = TryWithFNeg(DefBits, 16,
5768 DstSize == 64 ? AArch64::FNEGv4f16
5769 : AArch64::FNEGv8f16))))
5770 return R;
5771 }
5772
5773 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5774 if (!CPLoad) {
5775 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5776 return nullptr;
5777 }
5778
5779 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5781 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5782 return &*Copy;
5783}
5784
5785bool AArch64InstructionSelector::tryOptConstantBuildVec(
5786 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5787 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5788 unsigned DstSize = DstTy.getSizeInBits();
5789 assert(DstSize <= 128 && "Unexpected build_vec type!");
5790 if (DstSize < 32)
5791 return false;
5792 // Check if we're building a constant vector, in which case we want to
5793 // generate a constant pool load instead of a vector insert sequence.
5795 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5796 Register OpReg = I.getOperand(Idx).getReg();
5797 if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
5798 OpReg, MRI, /*LookThroughInstrs=*/true,
5799 /*LookThroughAnyExt=*/true)) {
5800 MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
5801
5802 if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
5803 Csts.emplace_back(
5804 ConstantInt::get(MIB.getMF().getFunction().getContext(),
5805 std::move(AnyConst->Value)));
5806 continue;
5807 }
5808
5809 if (DefMI->getOpcode() == TargetOpcode::G_FCONSTANT) {
5810 Csts.emplace_back(
5811 const_cast<ConstantFP *>(DefMI->getOperand(1).getFPImm()));
5812 continue;
5813 }
5814 }
5815 return false;
5816 }
5817 Constant *CV = ConstantVector::get(Csts);
5818 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5819 return false;
5820 I.eraseFromParent();
5821 return true;
5822}
5823
5824bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5825 MachineInstr &I, MachineRegisterInfo &MRI) {
5826 // Given:
5827 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5828 //
5829 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5830 Register Dst = I.getOperand(0).getReg();
5831 Register EltReg = I.getOperand(1).getReg();
5832 LLT EltTy = MRI.getType(EltReg);
5833 // If the index isn't on the same bank as its elements, then this can't be a
5834 // SUBREG_TO_REG.
5835 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5836 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5837 if (EltRB != DstRB)
5838 return false;
5839 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5840 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5841 }))
5842 return false;
5843 unsigned SubReg;
5844 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5845 if (!EltRC)
5846 return false;
5847 const TargetRegisterClass *DstRC =
5848 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5849 if (!DstRC)
5850 return false;
5851 if (!getSubRegForClass(EltRC, TRI, SubReg))
5852 return false;
5853 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5854 .addUse(EltReg)
5855 .addImm(SubReg);
5856 I.eraseFromParent();
5857 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5858 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5859}
5860
5861bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5862 MachineRegisterInfo &MRI) {
5863 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5864 // Until we port more of the optimized selections, for now just use a vector
5865 // insert sequence.
5866 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5867 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5868 unsigned EltSize = EltTy.getSizeInBits();
5869
5870 if (tryOptConstantBuildVec(I, DstTy, MRI))
5871 return true;
5872 if (tryOptBuildVecToSubregToReg(I, MRI))
5873 return true;
5874
5875 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5876 return false; // Don't support all element types yet.
5877 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5878
5879 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5880 MachineInstr *ScalarToVec =
5881 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5882 I.getOperand(1).getReg(), MIB);
5883 if (!ScalarToVec)
5884 return false;
5885
5886 Register DstVec = ScalarToVec->getOperand(0).getReg();
5887 unsigned DstSize = DstTy.getSizeInBits();
5888
5889 // Keep track of the last MI we inserted. Later on, we might be able to save
5890 // a copy using it.
5891 MachineInstr *PrevMI = ScalarToVec;
5892 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5893 // Note that if we don't do a subregister copy, we can end up making an
5894 // extra register.
5895 Register OpReg = I.getOperand(i).getReg();
5896 // Do not emit inserts for undefs
5897 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5898 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5899 DstVec = PrevMI->getOperand(0).getReg();
5900 }
5901 }
5902
5903 // If DstTy's size in bits is less than 128, then emit a subregister copy
5904 // from DstVec to the last register we've defined.
5905 if (DstSize < 128) {
5906 // Force this to be FPR using the destination vector.
5907 const TargetRegisterClass *RC =
5908 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5909 if (!RC)
5910 return false;
5911 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5912 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5913 return false;
5914 }
5915
5916 unsigned SubReg = 0;
5917 if (!getSubRegForClass(RC, TRI, SubReg))
5918 return false;
5919 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5920 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5921 << "\n");
5922 return false;
5923 }
5924
5926 Register DstReg = I.getOperand(0).getReg();
5927
5928 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, {}, SubReg);
5929 MachineOperand &RegOp = I.getOperand(1);
5930 RegOp.setReg(Reg);
5931 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5932 } else {
5933 // We either have a vector with all elements (except the first one) undef or
5934 // at least one non-undef non-first element. In the first case, we need to
5935 // constrain the output register ourselves as we may have generated an
5936 // INSERT_SUBREG operation which is a generic operation for which the
5937 // output regclass cannot be automatically chosen.
5938 //
5939 // In the second case, there is no need to do this as it may generate an
5940 // instruction like INSvi32gpr where the regclass can be automatically
5941 // chosen.
5942 //
5943 // Also, we save a copy by re-using the destination register on the final
5944 // insert.
5945 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5947
5948 Register DstReg = PrevMI->getOperand(0).getReg();
5949 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5950 const TargetRegisterClass *RC =
5951 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5952 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5953 }
5954 }
5955
5957 return true;
5958}
5959
5960bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5961 unsigned NumVecs,
5962 MachineInstr &I) {
5963 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5964 assert(Opc && "Expected an opcode?");
5965 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5966 auto &MRI = *MIB.getMRI();
5967 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5968 unsigned Size = Ty.getSizeInBits();
5969 assert((Size == 64 || Size == 128) &&
5970 "Destination must be 64 bits or 128 bits?");
5971 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5972 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5973 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5974 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5975 Load.cloneMemRefs(I);
5977 Register SelectedLoadDst = Load->getOperand(0).getReg();
5978 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5979 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5980 .addReg(SelectedLoadDst, {}, SubReg + Idx);
5981 // Emit the subreg copies and immediately select them.
5982 // FIXME: We should refactor our copy code into an emitCopy helper and
5983 // clean up uses of this pattern elsewhere in the selector.
5984 selectCopy(*Vec, TII, MRI, TRI, RBI);
5985 }
5986 return true;
5987}
5988
5989bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5990 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5991 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5992 assert(Opc && "Expected an opcode?");
5993 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5994 auto &MRI = *MIB.getMRI();
5995 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5996 bool Narrow = Ty.getSizeInBits() == 64;
5997
5998 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5999 SmallVector<Register, 4> Regs(NumVecs);
6000 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
6001 [](auto MO) { return MO.getReg(); });
6002
6003 if (Narrow) {
6004 transform(Regs, Regs.begin(), [this](Register Reg) {
6005 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6006 ->getOperand(0)
6007 .getReg();
6008 });
6009 Ty = Ty.multiplyElements(2);
6010 }
6011
6012 Register Tuple = createQTuple(Regs, MIB);
6013 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6014 if (!LaneNo)
6015 return false;
6016
6017 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6018 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6019 .addReg(Tuple)
6020 .addImm(LaneNo->getZExtValue())
6021 .addReg(Ptr);
6022 Load.cloneMemRefs(I);
6024 Register SelectedLoadDst = Load->getOperand(0).getReg();
6025 unsigned SubReg = AArch64::qsub0;
6026 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6027 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6028 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6029 : DstOp(I.getOperand(Idx).getReg())},
6030 {})
6031 .addReg(SelectedLoadDst, {}, SubReg + Idx);
6032 Register WideReg = Vec.getReg(0);
6033 // Emit the subreg copies and immediately select them.
6034 selectCopy(*Vec, TII, MRI, TRI, RBI);
6035 if (Narrow &&
6036 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6037 return false;
6038 }
6039 return true;
6040}
6041
6042void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6043 unsigned NumVecs,
6044 unsigned Opc) {
6045 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6046 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6047 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6048
6049 SmallVector<Register, 2> Regs(NumVecs);
6050 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6051 Regs.begin(), [](auto MO) { return MO.getReg(); });
6052
6053 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6054 : createDTuple(Regs, MIB);
6055 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6056 Store.cloneMemRefs(I);
6058}
6059
6060bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6061 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6062 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6063 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6064 bool Narrow = Ty.getSizeInBits() == 64;
6065
6066 SmallVector<Register, 2> Regs(NumVecs);
6067 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6068 Regs.begin(), [](auto MO) { return MO.getReg(); });
6069
6070 if (Narrow)
6071 transform(Regs, Regs.begin(), [this](Register Reg) {
6072 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6073 ->getOperand(0)
6074 .getReg();
6075 });
6076
6077 Register Tuple = createQTuple(Regs, MIB);
6078
6079 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6080 if (!LaneNo)
6081 return false;
6082 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6083 auto Store = MIB.buildInstr(Opc, {}, {})
6084 .addReg(Tuple)
6085 .addImm(LaneNo->getZExtValue())
6086 .addReg(Ptr);
6087 Store.cloneMemRefs(I);
6089 return true;
6090}
6091
6092bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6093 MachineInstr &I, MachineRegisterInfo &MRI) {
6094 // Find the intrinsic ID.
6095 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6096
6097 const LLT S8 = LLT::scalar(8);
6098 const LLT S16 = LLT::scalar(16);
6099 const LLT S32 = LLT::scalar(32);
6100 const LLT S64 = LLT::scalar(64);
6101 const LLT P0 = LLT::pointer(0, 64);
6102 // Select the instruction.
6103 switch (IntrinID) {
6104 default:
6105 return false;
6106 case Intrinsic::aarch64_ldxp:
6107 case Intrinsic::aarch64_ldaxp: {
6108 auto NewI = MIB.buildInstr(
6109 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6110 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6111 {I.getOperand(3)});
6112 NewI.cloneMemRefs(I);
6114 break;
6115 }
6116 case Intrinsic::aarch64_neon_ld1x2: {
6117 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6118 unsigned Opc = 0;
6119 if (Ty == LLT::fixed_vector(8, S8))
6120 Opc = AArch64::LD1Twov8b;
6121 else if (Ty == LLT::fixed_vector(16, S8))
6122 Opc = AArch64::LD1Twov16b;
6123 else if (Ty == LLT::fixed_vector(4, S16))
6124 Opc = AArch64::LD1Twov4h;
6125 else if (Ty == LLT::fixed_vector(8, S16))
6126 Opc = AArch64::LD1Twov8h;
6127 else if (Ty == LLT::fixed_vector(2, S32))
6128 Opc = AArch64::LD1Twov2s;
6129 else if (Ty == LLT::fixed_vector(4, S32))
6130 Opc = AArch64::LD1Twov4s;
6131 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6132 Opc = AArch64::LD1Twov2d;
6133 else if (Ty == S64 || Ty == P0)
6134 Opc = AArch64::LD1Twov1d;
6135 else
6136 llvm_unreachable("Unexpected type for ld1x2!");
6137 selectVectorLoadIntrinsic(Opc, 2, I);
6138 break;
6139 }
6140 case Intrinsic::aarch64_neon_ld1x3: {
6141 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6142 unsigned Opc = 0;
6143 if (Ty == LLT::fixed_vector(8, S8))
6144 Opc = AArch64::LD1Threev8b;
6145 else if (Ty == LLT::fixed_vector(16, S8))
6146 Opc = AArch64::LD1Threev16b;
6147 else if (Ty == LLT::fixed_vector(4, S16))
6148 Opc = AArch64::LD1Threev4h;
6149 else if (Ty == LLT::fixed_vector(8, S16))
6150 Opc = AArch64::LD1Threev8h;
6151 else if (Ty == LLT::fixed_vector(2, S32))
6152 Opc = AArch64::LD1Threev2s;
6153 else if (Ty == LLT::fixed_vector(4, S32))
6154 Opc = AArch64::LD1Threev4s;
6155 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6156 Opc = AArch64::LD1Threev2d;
6157 else if (Ty == S64 || Ty == P0)
6158 Opc = AArch64::LD1Threev1d;
6159 else
6160 llvm_unreachable("Unexpected type for ld1x3!");
6161 selectVectorLoadIntrinsic(Opc, 3, I);
6162 break;
6163 }
6164 case Intrinsic::aarch64_neon_ld1x4: {
6165 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6166 unsigned Opc = 0;
6167 if (Ty == LLT::fixed_vector(8, S8))
6168 Opc = AArch64::LD1Fourv8b;
6169 else if (Ty == LLT::fixed_vector(16, S8))
6170 Opc = AArch64::LD1Fourv16b;
6171 else if (Ty == LLT::fixed_vector(4, S16))
6172 Opc = AArch64::LD1Fourv4h;
6173 else if (Ty == LLT::fixed_vector(8, S16))
6174 Opc = AArch64::LD1Fourv8h;
6175 else if (Ty == LLT::fixed_vector(2, S32))
6176 Opc = AArch64::LD1Fourv2s;
6177 else if (Ty == LLT::fixed_vector(4, S32))
6178 Opc = AArch64::LD1Fourv4s;
6179 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6180 Opc = AArch64::LD1Fourv2d;
6181 else if (Ty == S64 || Ty == P0)
6182 Opc = AArch64::LD1Fourv1d;
6183 else
6184 llvm_unreachable("Unexpected type for ld1x4!");
6185 selectVectorLoadIntrinsic(Opc, 4, I);
6186 break;
6187 }
6188 case Intrinsic::aarch64_neon_ld2: {
6189 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6190 unsigned Opc = 0;
6191 if (Ty == LLT::fixed_vector(8, S8))
6192 Opc = AArch64::LD2Twov8b;
6193 else if (Ty == LLT::fixed_vector(16, S8))
6194 Opc = AArch64::LD2Twov16b;
6195 else if (Ty == LLT::fixed_vector(4, S16))
6196 Opc = AArch64::LD2Twov4h;
6197 else if (Ty == LLT::fixed_vector(8, S16))
6198 Opc = AArch64::LD2Twov8h;
6199 else if (Ty == LLT::fixed_vector(2, S32))
6200 Opc = AArch64::LD2Twov2s;
6201 else if (Ty == LLT::fixed_vector(4, S32))
6202 Opc = AArch64::LD2Twov4s;
6203 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6204 Opc = AArch64::LD2Twov2d;
6205 else if (Ty == S64 || Ty == P0)
6206 Opc = AArch64::LD1Twov1d;
6207 else
6208 llvm_unreachable("Unexpected type for ld2!");
6209 selectVectorLoadIntrinsic(Opc, 2, I);
6210 break;
6211 }
6212 case Intrinsic::aarch64_neon_ld2lane: {
6213 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6214 unsigned Opc;
6215 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6216 Opc = AArch64::LD2i8;
6217 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6218 Opc = AArch64::LD2i16;
6219 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6220 Opc = AArch64::LD2i32;
6221 else if (Ty == LLT::fixed_vector(2, S64) ||
6222 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6223 Opc = AArch64::LD2i64;
6224 else
6225 llvm_unreachable("Unexpected type for st2lane!");
6226 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6227 return false;
6228 break;
6229 }
6230 case Intrinsic::aarch64_neon_ld2r: {
6231 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6232 unsigned Opc = 0;
6233 if (Ty == LLT::fixed_vector(8, S8))
6234 Opc = AArch64::LD2Rv8b;
6235 else if (Ty == LLT::fixed_vector(16, S8))
6236 Opc = AArch64::LD2Rv16b;
6237 else if (Ty == LLT::fixed_vector(4, S16))
6238 Opc = AArch64::LD2Rv4h;
6239 else if (Ty == LLT::fixed_vector(8, S16))
6240 Opc = AArch64::LD2Rv8h;
6241 else if (Ty == LLT::fixed_vector(2, S32))
6242 Opc = AArch64::LD2Rv2s;
6243 else if (Ty == LLT::fixed_vector(4, S32))
6244 Opc = AArch64::LD2Rv4s;
6245 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6246 Opc = AArch64::LD2Rv2d;
6247 else if (Ty == S64 || Ty == P0)
6248 Opc = AArch64::LD2Rv1d;
6249 else
6250 llvm_unreachable("Unexpected type for ld2r!");
6251 selectVectorLoadIntrinsic(Opc, 2, I);
6252 break;
6253 }
6254 case Intrinsic::aarch64_neon_ld3: {
6255 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6256 unsigned Opc = 0;
6257 if (Ty == LLT::fixed_vector(8, S8))
6258 Opc = AArch64::LD3Threev8b;
6259 else if (Ty == LLT::fixed_vector(16, S8))
6260 Opc = AArch64::LD3Threev16b;
6261 else if (Ty == LLT::fixed_vector(4, S16))
6262 Opc = AArch64::LD3Threev4h;
6263 else if (Ty == LLT::fixed_vector(8, S16))
6264 Opc = AArch64::LD3Threev8h;
6265 else if (Ty == LLT::fixed_vector(2, S32))
6266 Opc = AArch64::LD3Threev2s;
6267 else if (Ty == LLT::fixed_vector(4, S32))
6268 Opc = AArch64::LD3Threev4s;
6269 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6270 Opc = AArch64::LD3Threev2d;
6271 else if (Ty == S64 || Ty == P0)
6272 Opc = AArch64::LD1Threev1d;
6273 else
6274 llvm_unreachable("Unexpected type for ld3!");
6275 selectVectorLoadIntrinsic(Opc, 3, I);
6276 break;
6277 }
6278 case Intrinsic::aarch64_neon_ld3lane: {
6279 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6280 unsigned Opc;
6281 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6282 Opc = AArch64::LD3i8;
6283 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6284 Opc = AArch64::LD3i16;
6285 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6286 Opc = AArch64::LD3i32;
6287 else if (Ty == LLT::fixed_vector(2, S64) ||
6288 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6289 Opc = AArch64::LD3i64;
6290 else
6291 llvm_unreachable("Unexpected type for st3lane!");
6292 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6293 return false;
6294 break;
6295 }
6296 case Intrinsic::aarch64_neon_ld3r: {
6297 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6298 unsigned Opc = 0;
6299 if (Ty == LLT::fixed_vector(8, S8))
6300 Opc = AArch64::LD3Rv8b;
6301 else if (Ty == LLT::fixed_vector(16, S8))
6302 Opc = AArch64::LD3Rv16b;
6303 else if (Ty == LLT::fixed_vector(4, S16))
6304 Opc = AArch64::LD3Rv4h;
6305 else if (Ty == LLT::fixed_vector(8, S16))
6306 Opc = AArch64::LD3Rv8h;
6307 else if (Ty == LLT::fixed_vector(2, S32))
6308 Opc = AArch64::LD3Rv2s;
6309 else if (Ty == LLT::fixed_vector(4, S32))
6310 Opc = AArch64::LD3Rv4s;
6311 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6312 Opc = AArch64::LD3Rv2d;
6313 else if (Ty == S64 || Ty == P0)
6314 Opc = AArch64::LD3Rv1d;
6315 else
6316 llvm_unreachable("Unexpected type for ld3r!");
6317 selectVectorLoadIntrinsic(Opc, 3, I);
6318 break;
6319 }
6320 case Intrinsic::aarch64_neon_ld4: {
6321 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6322 unsigned Opc = 0;
6323 if (Ty == LLT::fixed_vector(8, S8))
6324 Opc = AArch64::LD4Fourv8b;
6325 else if (Ty == LLT::fixed_vector(16, S8))
6326 Opc = AArch64::LD4Fourv16b;
6327 else if (Ty == LLT::fixed_vector(4, S16))
6328 Opc = AArch64::LD4Fourv4h;
6329 else if (Ty == LLT::fixed_vector(8, S16))
6330 Opc = AArch64::LD4Fourv8h;
6331 else if (Ty == LLT::fixed_vector(2, S32))
6332 Opc = AArch64::LD4Fourv2s;
6333 else if (Ty == LLT::fixed_vector(4, S32))
6334 Opc = AArch64::LD4Fourv4s;
6335 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6336 Opc = AArch64::LD4Fourv2d;
6337 else if (Ty == S64 || Ty == P0)
6338 Opc = AArch64::LD1Fourv1d;
6339 else
6340 llvm_unreachable("Unexpected type for ld4!");
6341 selectVectorLoadIntrinsic(Opc, 4, I);
6342 break;
6343 }
6344 case Intrinsic::aarch64_neon_ld4lane: {
6345 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6346 unsigned Opc;
6347 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6348 Opc = AArch64::LD4i8;
6349 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6350 Opc = AArch64::LD4i16;
6351 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6352 Opc = AArch64::LD4i32;
6353 else if (Ty == LLT::fixed_vector(2, S64) ||
6354 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6355 Opc = AArch64::LD4i64;
6356 else
6357 llvm_unreachable("Unexpected type for st4lane!");
6358 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6359 return false;
6360 break;
6361 }
6362 case Intrinsic::aarch64_neon_ld4r: {
6363 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6364 unsigned Opc = 0;
6365 if (Ty == LLT::fixed_vector(8, S8))
6366 Opc = AArch64::LD4Rv8b;
6367 else if (Ty == LLT::fixed_vector(16, S8))
6368 Opc = AArch64::LD4Rv16b;
6369 else if (Ty == LLT::fixed_vector(4, S16))
6370 Opc = AArch64::LD4Rv4h;
6371 else if (Ty == LLT::fixed_vector(8, S16))
6372 Opc = AArch64::LD4Rv8h;
6373 else if (Ty == LLT::fixed_vector(2, S32))
6374 Opc = AArch64::LD4Rv2s;
6375 else if (Ty == LLT::fixed_vector(4, S32))
6376 Opc = AArch64::LD4Rv4s;
6377 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6378 Opc = AArch64::LD4Rv2d;
6379 else if (Ty == S64 || Ty == P0)
6380 Opc = AArch64::LD4Rv1d;
6381 else
6382 llvm_unreachable("Unexpected type for ld4r!");
6383 selectVectorLoadIntrinsic(Opc, 4, I);
6384 break;
6385 }
6386 case Intrinsic::aarch64_neon_st1x2: {
6387 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6388 unsigned Opc;
6389 if (Ty == LLT::fixed_vector(8, S8))
6390 Opc = AArch64::ST1Twov8b;
6391 else if (Ty == LLT::fixed_vector(16, S8))
6392 Opc = AArch64::ST1Twov16b;
6393 else if (Ty == LLT::fixed_vector(4, S16))
6394 Opc = AArch64::ST1Twov4h;
6395 else if (Ty == LLT::fixed_vector(8, S16))
6396 Opc = AArch64::ST1Twov8h;
6397 else if (Ty == LLT::fixed_vector(2, S32))
6398 Opc = AArch64::ST1Twov2s;
6399 else if (Ty == LLT::fixed_vector(4, S32))
6400 Opc = AArch64::ST1Twov4s;
6401 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6402 Opc = AArch64::ST1Twov2d;
6403 else if (Ty == S64 || Ty == P0)
6404 Opc = AArch64::ST1Twov1d;
6405 else
6406 llvm_unreachable("Unexpected type for st1x2!");
6407 selectVectorStoreIntrinsic(I, 2, Opc);
6408 break;
6409 }
6410 case Intrinsic::aarch64_neon_st1x3: {
6411 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6412 unsigned Opc;
6413 if (Ty == LLT::fixed_vector(8, S8))
6414 Opc = AArch64::ST1Threev8b;
6415 else if (Ty == LLT::fixed_vector(16, S8))
6416 Opc = AArch64::ST1Threev16b;
6417 else if (Ty == LLT::fixed_vector(4, S16))
6418 Opc = AArch64::ST1Threev4h;
6419 else if (Ty == LLT::fixed_vector(8, S16))
6420 Opc = AArch64::ST1Threev8h;
6421 else if (Ty == LLT::fixed_vector(2, S32))
6422 Opc = AArch64::ST1Threev2s;
6423 else if (Ty == LLT::fixed_vector(4, S32))
6424 Opc = AArch64::ST1Threev4s;
6425 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6426 Opc = AArch64::ST1Threev2d;
6427 else if (Ty == S64 || Ty == P0)
6428 Opc = AArch64::ST1Threev1d;
6429 else
6430 llvm_unreachable("Unexpected type for st1x3!");
6431 selectVectorStoreIntrinsic(I, 3, Opc);
6432 break;
6433 }
6434 case Intrinsic::aarch64_neon_st1x4: {
6435 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6436 unsigned Opc;
6437 if (Ty == LLT::fixed_vector(8, S8))
6438 Opc = AArch64::ST1Fourv8b;
6439 else if (Ty == LLT::fixed_vector(16, S8))
6440 Opc = AArch64::ST1Fourv16b;
6441 else if (Ty == LLT::fixed_vector(4, S16))
6442 Opc = AArch64::ST1Fourv4h;
6443 else if (Ty == LLT::fixed_vector(8, S16))
6444 Opc = AArch64::ST1Fourv8h;
6445 else if (Ty == LLT::fixed_vector(2, S32))
6446 Opc = AArch64::ST1Fourv2s;
6447 else if (Ty == LLT::fixed_vector(4, S32))
6448 Opc = AArch64::ST1Fourv4s;
6449 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6450 Opc = AArch64::ST1Fourv2d;
6451 else if (Ty == S64 || Ty == P0)
6452 Opc = AArch64::ST1Fourv1d;
6453 else
6454 llvm_unreachable("Unexpected type for st1x4!");
6455 selectVectorStoreIntrinsic(I, 4, Opc);
6456 break;
6457 }
6458 case Intrinsic::aarch64_neon_st2: {
6459 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6460 unsigned Opc;
6461 if (Ty == LLT::fixed_vector(8, S8))
6462 Opc = AArch64::ST2Twov8b;
6463 else if (Ty == LLT::fixed_vector(16, S8))
6464 Opc = AArch64::ST2Twov16b;
6465 else if (Ty == LLT::fixed_vector(4, S16))
6466 Opc = AArch64::ST2Twov4h;
6467 else if (Ty == LLT::fixed_vector(8, S16))
6468 Opc = AArch64::ST2Twov8h;
6469 else if (Ty == LLT::fixed_vector(2, S32))
6470 Opc = AArch64::ST2Twov2s;
6471 else if (Ty == LLT::fixed_vector(4, S32))
6472 Opc = AArch64::ST2Twov4s;
6473 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6474 Opc = AArch64::ST2Twov2d;
6475 else if (Ty == S64 || Ty == P0)
6476 Opc = AArch64::ST1Twov1d;
6477 else
6478 llvm_unreachable("Unexpected type for st2!");
6479 selectVectorStoreIntrinsic(I, 2, Opc);
6480 break;
6481 }
6482 case Intrinsic::aarch64_neon_st3: {
6483 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6484 unsigned Opc;
6485 if (Ty == LLT::fixed_vector(8, S8))
6486 Opc = AArch64::ST3Threev8b;
6487 else if (Ty == LLT::fixed_vector(16, S8))
6488 Opc = AArch64::ST3Threev16b;
6489 else if (Ty == LLT::fixed_vector(4, S16))
6490 Opc = AArch64::ST3Threev4h;
6491 else if (Ty == LLT::fixed_vector(8, S16))
6492 Opc = AArch64::ST3Threev8h;
6493 else if (Ty == LLT::fixed_vector(2, S32))
6494 Opc = AArch64::ST3Threev2s;
6495 else if (Ty == LLT::fixed_vector(4, S32))
6496 Opc = AArch64::ST3Threev4s;
6497 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6498 Opc = AArch64::ST3Threev2d;
6499 else if (Ty == S64 || Ty == P0)
6500 Opc = AArch64::ST1Threev1d;
6501 else
6502 llvm_unreachable("Unexpected type for st3!");
6503 selectVectorStoreIntrinsic(I, 3, Opc);
6504 break;
6505 }
6506 case Intrinsic::aarch64_neon_st4: {
6507 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6508 unsigned Opc;
6509 if (Ty == LLT::fixed_vector(8, S8))
6510 Opc = AArch64::ST4Fourv8b;
6511 else if (Ty == LLT::fixed_vector(16, S8))
6512 Opc = AArch64::ST4Fourv16b;
6513 else if (Ty == LLT::fixed_vector(4, S16))
6514 Opc = AArch64::ST4Fourv4h;
6515 else if (Ty == LLT::fixed_vector(8, S16))
6516 Opc = AArch64::ST4Fourv8h;
6517 else if (Ty == LLT::fixed_vector(2, S32))
6518 Opc = AArch64::ST4Fourv2s;
6519 else if (Ty == LLT::fixed_vector(4, S32))
6520 Opc = AArch64::ST4Fourv4s;
6521 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6522 Opc = AArch64::ST4Fourv2d;
6523 else if (Ty == S64 || Ty == P0)
6524 Opc = AArch64::ST1Fourv1d;
6525 else
6526 llvm_unreachable("Unexpected type for st4!");
6527 selectVectorStoreIntrinsic(I, 4, Opc);
6528 break;
6529 }
6530 case Intrinsic::aarch64_neon_st2lane: {
6531 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6532 unsigned Opc;
6533 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6534 Opc = AArch64::ST2i8;
6535 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6536 Opc = AArch64::ST2i16;
6537 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6538 Opc = AArch64::ST2i32;
6539 else if (Ty == LLT::fixed_vector(2, S64) ||
6540 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6541 Opc = AArch64::ST2i64;
6542 else
6543 llvm_unreachable("Unexpected type for st2lane!");
6544 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6545 return false;
6546 break;
6547 }
6548 case Intrinsic::aarch64_neon_st3lane: {
6549 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6550 unsigned Opc;
6551 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6552 Opc = AArch64::ST3i8;
6553 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6554 Opc = AArch64::ST3i16;
6555 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6556 Opc = AArch64::ST3i32;
6557 else if (Ty == LLT::fixed_vector(2, S64) ||
6558 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6559 Opc = AArch64::ST3i64;
6560 else
6561 llvm_unreachable("Unexpected type for st3lane!");
6562 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6563 return false;
6564 break;
6565 }
6566 case Intrinsic::aarch64_neon_st4lane: {
6567 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6568 unsigned Opc;
6569 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6570 Opc = AArch64::ST4i8;
6571 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6572 Opc = AArch64::ST4i16;
6573 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6574 Opc = AArch64::ST4i32;
6575 else if (Ty == LLT::fixed_vector(2, S64) ||
6576 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6577 Opc = AArch64::ST4i64;
6578 else
6579 llvm_unreachable("Unexpected type for st4lane!");
6580 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6581 return false;
6582 break;
6583 }
6584 case Intrinsic::aarch64_mops_memset_tag: {
6585 // Transform
6586 // %dst:gpr(p0) = \
6587 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6588 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6589 // where %dst is updated, into
6590 // %Rd:GPR64common, %Rn:GPR64) = \
6591 // MOPSMemorySetTaggingPseudo \
6592 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6593 // where Rd and Rn are tied.
6594 // It is expected that %val has been extended to s64 in legalization.
6595 // Note that the order of the size/value operands are swapped.
6596
6597 Register DstDef = I.getOperand(0).getReg();
6598 // I.getOperand(1) is the intrinsic function
6599 Register DstUse = I.getOperand(2).getReg();
6600 Register ValUse = I.getOperand(3).getReg();
6601 Register SizeUse = I.getOperand(4).getReg();
6602
6603 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6604 // Therefore an additional virtual register is required for the updated size
6605 // operand. This value is not accessible via the semantics of the intrinsic.
6607
6608 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6609 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6610 Memset.cloneMemRefs(I);
6612 break;
6613 }
6614 case Intrinsic::ptrauth_resign_load_relative: {
6615 Register DstReg = I.getOperand(0).getReg();
6616 Register ValReg = I.getOperand(2).getReg();
6617 uint64_t AUTKey = I.getOperand(3).getImm();
6618 Register AUTDisc = I.getOperand(4).getReg();
6619 uint64_t PACKey = I.getOperand(5).getImm();
6620 Register PACDisc = I.getOperand(6).getReg();
6621 int64_t Addend = I.getOperand(7).getImm();
6622
6623 Register AUTAddrDisc = AUTDisc;
6624 uint16_t AUTConstDiscC = 0;
6625 std::tie(AUTConstDiscC, AUTAddrDisc) =
6627
6628 Register PACAddrDisc = PACDisc;
6629 uint16_t PACConstDiscC = 0;
6630 std::tie(PACConstDiscC, PACAddrDisc) =
6632
6633 MIB.buildCopy({AArch64::X16}, {ValReg});
6634
6635 MIB.buildInstr(AArch64::AUTRELLOADPAC)
6636 .addImm(AUTKey)
6637 .addImm(AUTConstDiscC)
6638 .addUse(AUTAddrDisc)
6639 .addImm(PACKey)
6640 .addImm(PACConstDiscC)
6641 .addUse(PACAddrDisc)
6642 .addImm(Addend)
6643 .constrainAllUses(TII, TRI, RBI);
6644 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6645
6646 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6647 I.eraseFromParent();
6648 return true;
6649 }
6650 }
6651
6652 I.eraseFromParent();
6653 return true;
6654}
6655
6656bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6657 MachineRegisterInfo &MRI) {
6658 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6659
6660 switch (IntrinID) {
6661 default:
6662 break;
6663 case Intrinsic::ptrauth_resign: {
6664 Register DstReg = I.getOperand(0).getReg();
6665 Register ValReg = I.getOperand(2).getReg();
6666 uint64_t AUTKey = I.getOperand(3).getImm();
6667 Register AUTDisc = I.getOperand(4).getReg();
6668 uint64_t PACKey = I.getOperand(5).getImm();
6669 Register PACDisc = I.getOperand(6).getReg();
6670
6671 Register AUTAddrDisc = AUTDisc;
6672 uint16_t AUTConstDiscC = 0;
6673 std::tie(AUTConstDiscC, AUTAddrDisc) =
6675
6676 Register PACAddrDisc = PACDisc;
6677 uint16_t PACConstDiscC = 0;
6678 std::tie(PACConstDiscC, PACAddrDisc) =
6680
6681 MIB.buildCopy({AArch64::X16}, {ValReg});
6682 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6683 MIB.buildInstr(AArch64::AUTPAC)
6684 .addImm(AUTKey)
6685 .addImm(AUTConstDiscC)
6686 .addUse(AUTAddrDisc)
6687 .addImm(PACKey)
6688 .addImm(PACConstDiscC)
6689 .addUse(PACAddrDisc)
6690 .constrainAllUses(TII, TRI, RBI);
6691 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6692
6693 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6694 I.eraseFromParent();
6695 return true;
6696 }
6697 case Intrinsic::ptrauth_auth: {
6698 Register DstReg = I.getOperand(0).getReg();
6699 Register ValReg = I.getOperand(2).getReg();
6700 uint64_t AUTKey = I.getOperand(3).getImm();
6701 Register AUTDisc = I.getOperand(4).getReg();
6702
6703 Register AUTAddrDisc = AUTDisc;
6704 uint16_t AUTConstDiscC = 0;
6705 std::tie(AUTConstDiscC, AUTAddrDisc) =
6707
6708 if (STI.isX16X17Safer()) {
6709 MIB.buildCopy({AArch64::X16}, {ValReg});
6710 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6711 MIB.buildInstr(AArch64::AUTx16x17)
6712 .addImm(AUTKey)
6713 .addImm(AUTConstDiscC)
6714 .addUse(AUTAddrDisc)
6715 .constrainAllUses(TII, TRI, RBI);
6716 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6717 } else {
6718 Register ScratchReg =
6719 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6720 MIB.buildInstr(AArch64::AUTxMxN)
6721 .addDef(DstReg)
6722 .addDef(ScratchReg)
6723 .addUse(ValReg)
6724 .addImm(AUTKey)
6725 .addImm(AUTConstDiscC)
6726 .addUse(AUTAddrDisc)
6727 .constrainAllUses(TII, TRI, RBI);
6728 }
6729
6730 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6731 I.eraseFromParent();
6732 return true;
6733 }
6734 case Intrinsic::frameaddress:
6735 case Intrinsic::returnaddress: {
6736 MachineFunction &MF = *I.getParent()->getParent();
6737 MachineFrameInfo &MFI = MF.getFrameInfo();
6738
6739 unsigned Depth = I.getOperand(2).getImm();
6740 Register DstReg = I.getOperand(0).getReg();
6741 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6742
6743 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6744 if (!MFReturnAddr) {
6745 // Insert the copy from LR/X30 into the entry block, before it can be
6746 // clobbered by anything.
6747 MFI.setReturnAddressIsTaken(true);
6748 MFReturnAddr = getFunctionLiveInPhysReg(
6749 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6750 }
6751
6752 if (STI.hasPAuth()) {
6753 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6754 } else {
6755 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6756 MIB.buildInstr(AArch64::XPACLRI);
6757 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6758 }
6759
6760 I.eraseFromParent();
6761 return true;
6762 }
6763
6764 MFI.setFrameAddressIsTaken(true);
6765 Register FrameAddr(AArch64::FP);
6766 while (Depth--) {
6767 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6768 auto Ldr =
6769 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6771 FrameAddr = NextFrame;
6772 }
6773
6774 if (IntrinID == Intrinsic::frameaddress)
6775 MIB.buildCopy({DstReg}, {FrameAddr});
6776 else {
6777 MFI.setReturnAddressIsTaken(true);
6778
6779 if (STI.hasPAuth()) {
6780 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6781 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6782 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6783 } else {
6784 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6785 .addImm(1);
6786 MIB.buildInstr(AArch64::XPACLRI);
6787 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6788 }
6789 }
6790
6791 I.eraseFromParent();
6792 return true;
6793 }
6794 case Intrinsic::aarch64_neon_tbl2:
6795 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6796 return true;
6797 case Intrinsic::aarch64_neon_tbl3:
6798 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6799 false);
6800 return true;
6801 case Intrinsic::aarch64_neon_tbl4:
6802 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6803 return true;
6804 case Intrinsic::aarch64_neon_tbx2:
6805 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6806 return true;
6807 case Intrinsic::aarch64_neon_tbx3:
6808 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6809 return true;
6810 case Intrinsic::aarch64_neon_tbx4:
6811 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6812 return true;
6813 case Intrinsic::swift_async_context_addr:
6814 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6815 {Register(AArch64::FP)})
6816 .addImm(8)
6817 .addImm(0);
6819
6821 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6822 I.eraseFromParent();
6823 return true;
6824 }
6825 return false;
6826}
6827
6828// G_PTRAUTH_GLOBAL_VALUE lowering
6829//
6830// We have 3 lowering alternatives to choose from:
6831// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6832// If the GV doesn't need a GOT load (i.e., is locally defined)
6833// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6834//
6835// - LOADgotPAC: similar to LOADgot, with added PAC.
6836// If the GV needs a GOT load, materialize the pointer using the usual
6837// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6838// section is assumed to be read-only (for example, via relro mechanism). See
6839// LowerMOVaddrPAC.
6840//
6841// - LOADauthptrstatic: similar to LOADgot, but use a
6842// special stub slot instead of a GOT slot.
6843// Load a signed pointer for symbol 'sym' from a stub slot named
6844// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6845// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6846// .data with an
6847// @AUTH relocation. See LowerLOADauthptrstatic.
6848//
6849// All 3 are pseudos that are expand late to longer sequences: this lets us
6850// provide integrity guarantees on the to-be-signed intermediate values.
6851//
6852// LOADauthptrstatic is undesirable because it requires a large section filled
6853// with often similarly-signed pointers, making it a good harvesting target.
6854// Thus, it's only used for ptrauth references to extern_weak to avoid null
6855// checks.
6856
6857bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6858 MachineInstr &I, MachineRegisterInfo &MRI) const {
6859 Register DefReg = I.getOperand(0).getReg();
6860 Register Addr = I.getOperand(1).getReg();
6861 uint64_t Key = I.getOperand(2).getImm();
6862 Register AddrDisc = I.getOperand(3).getReg();
6863 uint64_t Disc = I.getOperand(4).getImm();
6864 int64_t Offset = 0;
6865
6867 report_fatal_error("key in ptrauth global out of range [0, " +
6868 Twine((int)AArch64PACKey::LAST) + "]");
6869
6870 // Blend only works if the integer discriminator is 16-bit wide.
6871 if (!isUInt<16>(Disc))
6873 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6874
6875 // Choosing between 3 lowering alternatives is target-specific.
6876 if (!STI.isTargetELF() && !STI.isTargetMachO())
6877 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6878
6879 if (!MRI.hasOneDef(Addr))
6880 return false;
6881
6882 // First match any offset we take from the real global.
6883 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6884 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6885 Register OffsetReg = DefMI->getOperand(2).getReg();
6886 if (!MRI.hasOneDef(OffsetReg))
6887 return false;
6888 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6889 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6890 return false;
6891
6892 Addr = DefMI->getOperand(1).getReg();
6893 if (!MRI.hasOneDef(Addr))
6894 return false;
6895
6896 DefMI = &*MRI.def_instr_begin(Addr);
6897 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6898 }
6899
6900 // We should be left with a genuine unauthenticated GlobalValue.
6901 const GlobalValue *GV;
6902 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6903 GV = DefMI->getOperand(1).getGlobal();
6905 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6906 GV = DefMI->getOperand(2).getGlobal();
6908 } else {
6909 return false;
6910 }
6911
6912 MachineIRBuilder MIB(I);
6913
6914 // Classify the reference to determine whether it needs a GOT load.
6915 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6916 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6917 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6918 "unsupported non-GOT op flags on ptrauth global reference");
6919 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6920 "unsupported non-GOT reference to weak ptrauth global");
6921
6922 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6923 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6924
6925 // Non-extern_weak:
6926 // - No GOT load needed -> MOVaddrPAC
6927 // - GOT load for non-extern_weak -> LOADgotPAC
6928 // Note that we disallow extern_weak refs to avoid null checks later.
6929 if (!GV->hasExternalWeakLinkage()) {
6930 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6931 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6932 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6934 .addImm(Key)
6935 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6936 .addImm(Disc)
6937 .constrainAllUses(TII, TRI, RBI);
6938 MIB.buildCopy(DefReg, Register(AArch64::X16));
6939 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6940 I.eraseFromParent();
6941 return true;
6942 }
6943
6944 // extern_weak -> LOADauthptrstatic
6945
6946 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6947 // offset alone as a pointer if the symbol wasn't available, which would
6948 // probably break null checks in users. Ptrauth complicates things further:
6949 // error out.
6950 if (Offset != 0)
6952 "unsupported non-zero offset in weak ptrauth global reference");
6953
6954 if (HasAddrDisc)
6955 report_fatal_error("unsupported weak addr-div ptrauth global");
6956
6957 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6958 .addGlobalAddress(GV, Offset)
6959 .addImm(Key)
6960 .addImm(Disc);
6961 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6962
6963 I.eraseFromParent();
6964 return true;
6965}
6966
6967void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6968 MachineRegisterInfo &MRI,
6969 unsigned NumVec, unsigned Opc1,
6970 unsigned Opc2, bool isExt) {
6971 Register DstReg = I.getOperand(0).getReg();
6972 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6973
6974 // Create the REG_SEQUENCE
6976 for (unsigned i = 0; i < NumVec; i++)
6977 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6978 Register RegSeq = createQTuple(Regs, MIB);
6979
6980 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6981 MachineInstrBuilder Instr;
6982 if (isExt) {
6983 Register Reg = I.getOperand(2).getReg();
6984 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6985 } else
6986 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6988 I.eraseFromParent();
6989}
6990
6991InstructionSelector::ComplexRendererFns
6992AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6993 auto MaybeImmed = getImmedFromMO(Root);
6994 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6995 return std::nullopt;
6996 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6997 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6998}
6999
7000InstructionSelector::ComplexRendererFns
7001AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
7002 auto MaybeImmed = getImmedFromMO(Root);
7003 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7004 return std::nullopt;
7005 uint64_t Enc = 31 - *MaybeImmed;
7006 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7007}
7008
7009InstructionSelector::ComplexRendererFns
7010AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7011 auto MaybeImmed = getImmedFromMO(Root);
7012 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7013 return std::nullopt;
7014 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7015 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7016}
7017
7018InstructionSelector::ComplexRendererFns
7019AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7020 auto MaybeImmed = getImmedFromMO(Root);
7021 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7022 return std::nullopt;
7023 uint64_t Enc = 63 - *MaybeImmed;
7024 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7025}
7026
7027/// Helper to select an immediate value that can be represented as a 12-bit
7028/// value shifted left by either 0 or 12. If it is possible to do so, return
7029/// the immediate and shift value. If not, return std::nullopt.
7030///
7031/// Used by selectArithImmed and selectNegArithImmed.
7032InstructionSelector::ComplexRendererFns
7033AArch64InstructionSelector::select12BitValueWithLeftShift(
7034 uint64_t Immed) const {
7035 unsigned ShiftAmt;
7036 if (Immed >> 12 == 0) {
7037 ShiftAmt = 0;
7038 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7039 ShiftAmt = 12;
7040 Immed = Immed >> 12;
7041 } else
7042 return std::nullopt;
7043
7044 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7045 return {{
7046 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7047 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7048 }};
7049}
7050
7051/// SelectArithImmed - Select an immediate value that can be represented as
7052/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7053/// Val set to the 12-bit value and Shift set to the shifter operand.
7054InstructionSelector::ComplexRendererFns
7055AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7056 // This function is called from the addsub_shifted_imm ComplexPattern,
7057 // which lists [imm] as the list of opcode it's interested in, however
7058 // we still need to check whether the operand is actually an immediate
7059 // here because the ComplexPattern opcode list is only used in
7060 // root-level opcode matching.
7061 auto MaybeImmed = getImmedFromMO(Root);
7062 if (MaybeImmed == std::nullopt)
7063 return std::nullopt;
7064 return select12BitValueWithLeftShift(*MaybeImmed);
7065}
7066
7067/// SelectNegArithImmed - As above, but negates the value before trying to
7068/// select it.
7069InstructionSelector::ComplexRendererFns
7070AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7071 // We need a register here, because we need to know if we have a 64 or 32
7072 // bit immediate.
7073 if (!Root.isReg())
7074 return std::nullopt;
7075 auto MaybeImmed = getImmedFromMO(Root);
7076 if (MaybeImmed == std::nullopt)
7077 return std::nullopt;
7078 uint64_t Immed = *MaybeImmed;
7079
7080 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7081 // have the opposite effect on the C flag, so this pattern mustn't match under
7082 // those circumstances.
7083 if (Immed == 0)
7084 return std::nullopt;
7085
7086 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7087 // the root.
7088 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7089 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7090 Immed = ~((uint32_t)Immed) + 1;
7091 else
7092 Immed = ~Immed + 1ULL;
7093
7094 if (Immed & 0xFFFFFFFFFF000000ULL)
7095 return std::nullopt;
7096
7097 Immed &= 0xFFFFFFULL;
7098 return select12BitValueWithLeftShift(Immed);
7099}
7100
7101/// Checks if we are sure that folding MI into load/store addressing mode is
7102/// beneficial or not.
7103///
7104/// Returns:
7105/// - true if folding MI would be beneficial.
7106/// - false if folding MI would be bad.
7107/// - std::nullopt if it is not sure whether folding MI is beneficial.
7108///
7109/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7110///
7111/// %13:gpr(s64) = G_CONSTANT i64 1
7112/// %8:gpr(s64) = G_SHL %6, %13(s64)
7113/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7114/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7115std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7116 const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7117 if (MI.getOpcode() == AArch64::G_SHL) {
7118 // Address operands with shifts are free, except for running on subtargets
7119 // with AddrLSLSlow14.
7120 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7121 MI.getOperand(2).getReg(), MRI)) {
7122 const APInt ShiftVal = ValAndVeg->Value;
7123
7124 // Don't fold if we know this will be slow.
7125 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7126 }
7127 }
7128 return std::nullopt;
7129}
7130
7131/// Return true if it is worth folding MI into an extended register. That is,
7132/// if it's safe to pull it into the addressing mode of a load or store as a
7133/// shift.
7134/// \p IsAddrOperand whether the def of MI is used as an address operand
7135/// (e.g. feeding into an LDR/STR).
7136bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7137 const MachineInstr &MI, const MachineRegisterInfo &MRI,
7138 bool IsAddrOperand) const {
7139
7140 // Always fold if there is one use, or if we're optimizing for size.
7141 Register DefReg = MI.getOperand(0).getReg();
7142 if (MRI.hasOneNonDBGUse(DefReg) ||
7143 MI.getParent()->getParent()->getFunction().hasOptSize())
7144 return true;
7145
7146 if (IsAddrOperand) {
7147 // If we are already sure that folding MI is good or bad, return the result.
7148 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7149 return *Worth;
7150
7151 // Fold G_PTR_ADD if its offset operand can be folded
7152 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7153 MachineInstr *OffsetInst =
7154 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7155
7156 // Note, we already know G_PTR_ADD is used by at least two instructions.
7157 // If we are also sure about whether folding is beneficial or not,
7158 // return the result.
7159 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7160 return *Worth;
7161 }
7162 }
7163
7164 // FIXME: Consider checking HasALULSLFast as appropriate.
7165
7166 // We have a fastpath, so folding a shift in and potentially computing it
7167 // many times may be beneficial. Check if this is only used in memory ops.
7168 // If it is, then we should fold.
7169 return all_of(MRI.use_nodbg_instructions(DefReg),
7170 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7171}
7172
7173InstructionSelector::ComplexRendererFns
7174AArch64InstructionSelector::selectExtendedSHL(
7175 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7176 unsigned SizeInBytes, bool WantsExt) const {
7177 assert(Base.isReg() && "Expected base to be a register operand");
7178 assert(Offset.isReg() && "Expected offset to be a register operand");
7179
7180 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7181 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7182
7183 unsigned OffsetOpc = OffsetInst->getOpcode();
7184 bool LookedThroughZExt = false;
7185 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7186 // Try to look through a ZEXT.
7187 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7188 return std::nullopt;
7189
7190 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7191 OffsetOpc = OffsetInst->getOpcode();
7192 LookedThroughZExt = true;
7193
7194 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7195 return std::nullopt;
7196 }
7197 // Make sure that the memory op is a valid size.
7198 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7199 if (LegalShiftVal == 0)
7200 return std::nullopt;
7201 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7202 return std::nullopt;
7203
7204 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7205 // register we will offset is the LHS, and the register containing the
7206 // constant is the RHS.
7207 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7208 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7209 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7210 if (!ValAndVReg) {
7211 // We didn't get a constant on the RHS. If the opcode is a shift, then
7212 // we're done.
7213 if (OffsetOpc == TargetOpcode::G_SHL)
7214 return std::nullopt;
7215
7216 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7217 std::swap(OffsetReg, ConstantReg);
7218 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7219 if (!ValAndVReg)
7220 return std::nullopt;
7221 }
7222
7223 // The value must fit into 3 bits, and must be positive. Make sure that is
7224 // true.
7225 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7226
7227 // Since we're going to pull this into a shift, the constant value must be
7228 // a power of 2. If we got a multiply, then we need to check this.
7229 if (OffsetOpc == TargetOpcode::G_MUL) {
7230 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7231 return std::nullopt;
7232
7233 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7234 ImmVal = Log2_32(ImmVal);
7235 }
7236
7237 if ((ImmVal & 0x7) != ImmVal)
7238 return std::nullopt;
7239
7240 // We are only allowed to shift by LegalShiftVal. This shift value is built
7241 // into the instruction, so we can't just use whatever we want.
7242 if (ImmVal != LegalShiftVal)
7243 return std::nullopt;
7244
7245 unsigned SignExtend = 0;
7246 if (WantsExt) {
7247 // Check if the offset is defined by an extend, unless we looked through a
7248 // G_ZEXT earlier.
7249 if (!LookedThroughZExt) {
7250 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7251 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7253 return std::nullopt;
7254
7255 SignExtend = AArch64_AM::isSignExtendShiftType(Ext) ? 1 : 0;
7256 // We only support SXTW for signed extension here.
7257 if (SignExtend && Ext != AArch64_AM::SXTW)
7258 return std::nullopt;
7259 OffsetReg = ExtInst->getOperand(1).getReg();
7260 }
7261
7262 // Need a 32-bit wide register here.
7263 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7264 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7265 }
7266
7267 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7268 // offset. Signify that we are shifting by setting the shift flag to 1.
7269 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7270 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7271 [=](MachineInstrBuilder &MIB) {
7272 // Need to add both immediates here to make sure that they are both
7273 // added to the instruction.
7274 MIB.addImm(SignExtend);
7275 MIB.addImm(1);
7276 }}};
7277}
7278
7279/// This is used for computing addresses like this:
7280///
7281/// ldr x1, [x2, x3, lsl #3]
7282///
7283/// Where x2 is the base register, and x3 is an offset register. The shift-left
7284/// is a constant value specific to this load instruction. That is, we'll never
7285/// see anything other than a 3 here (which corresponds to the size of the
7286/// element being loaded.)
7287InstructionSelector::ComplexRendererFns
7288AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7289 MachineOperand &Root, unsigned SizeInBytes) const {
7290 if (!Root.isReg())
7291 return std::nullopt;
7292 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7293
7294 // We want to find something like this:
7295 //
7296 // val = G_CONSTANT LegalShiftVal
7297 // shift = G_SHL off_reg val
7298 // ptr = G_PTR_ADD base_reg shift
7299 // x = G_LOAD ptr
7300 //
7301 // And fold it into this addressing mode:
7302 //
7303 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7304
7305 // Check if we can find the G_PTR_ADD.
7306 MachineInstr *PtrAdd =
7307 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7308 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7309 return std::nullopt;
7310
7311 // Now, try to match an opcode which will match our specific offset.
7312 // We want a G_SHL or a G_MUL.
7313 MachineInstr *OffsetInst =
7314 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
7315 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7316 OffsetInst->getOperand(0), SizeInBytes,
7317 /*WantsExt=*/false);
7318}
7319
7320/// This is used for computing addresses like this:
7321///
7322/// ldr x1, [x2, x3]
7323///
7324/// Where x2 is the base register, and x3 is an offset register.
7325///
7326/// When possible (or profitable) to fold a G_PTR_ADD into the address
7327/// calculation, this will do so. Otherwise, it will return std::nullopt.
7328InstructionSelector::ComplexRendererFns
7329AArch64InstructionSelector::selectAddrModeRegisterOffset(
7330 MachineOperand &Root) const {
7331 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7332
7333 // We need a GEP.
7334 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7335 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7336 return std::nullopt;
7337
7338 // If this is used more than once, let's not bother folding.
7339 // TODO: Check if they are memory ops. If they are, then we can still fold
7340 // without having to recompute anything.
7341 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7342 return std::nullopt;
7343
7344 // Base is the GEP's LHS, offset is its RHS.
7345 return {{[=](MachineInstrBuilder &MIB) {
7346 MIB.addUse(Gep->getOperand(1).getReg());
7347 },
7348 [=](MachineInstrBuilder &MIB) {
7349 MIB.addUse(Gep->getOperand(2).getReg());
7350 },
7351 [=](MachineInstrBuilder &MIB) {
7352 // Need to add both immediates here to make sure that they are both
7353 // added to the instruction.
7354 MIB.addImm(0);
7355 MIB.addImm(0);
7356 }}};
7357}
7358
7359/// This is intended to be equivalent to selectAddrModeXRO in
7360/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7361InstructionSelector::ComplexRendererFns
7362AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7363 unsigned SizeInBytes) const {
7364 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7365 if (!Root.isReg())
7366 return std::nullopt;
7367 MachineInstr *PtrAdd =
7368 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7369 if (!PtrAdd)
7370 return std::nullopt;
7371
7372 // Check for an immediates which cannot be encoded in the [base + imm]
7373 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7374 // end up with code like:
7375 //
7376 // mov x0, wide
7377 // add x1 base, x0
7378 // ldr x2, [x1, x0]
7379 //
7380 // In this situation, we can use the [base, xreg] addressing mode to save an
7381 // add/sub:
7382 //
7383 // mov x0, wide
7384 // ldr x2, [base, x0]
7385 auto ValAndVReg =
7387 if (ValAndVReg) {
7388 unsigned Scale = Log2_32(SizeInBytes);
7389 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7390
7391 // Skip immediates that can be selected in the load/store addressing
7392 // mode.
7393 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7394 ImmOff < (0x1000 << Scale))
7395 return std::nullopt;
7396
7397 // Helper lambda to decide whether or not it is preferable to emit an add.
7398 auto isPreferredADD = [](int64_t ImmOff) {
7399 // Constants in [0x0, 0xfff] can be encoded in an add.
7400 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7401 return true;
7402
7403 // Can it be encoded in an add lsl #12?
7404 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7405 return false;
7406
7407 // It can be encoded in an add lsl #12, but we may not want to. If it is
7408 // possible to select this as a single movz, then prefer that. A single
7409 // movz is faster than an add with a shift.
7410 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7411 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7412 };
7413
7414 // If the immediate can be encoded in a single add/sub, then bail out.
7415 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7416 return std::nullopt;
7417 }
7418
7419 // Try to fold shifts into the addressing mode.
7420 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7421 if (AddrModeFns)
7422 return AddrModeFns;
7423
7424 // If that doesn't work, see if it's possible to fold in registers from
7425 // a GEP.
7426 return selectAddrModeRegisterOffset(Root);
7427}
7428
7429/// This is used for computing addresses like this:
7430///
7431/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7432///
7433/// Where we have a 64-bit base register, a 32-bit offset register, and an
7434/// extend (which may or may not be signed).
7435InstructionSelector::ComplexRendererFns
7436AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7437 unsigned SizeInBytes) const {
7438 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7439
7440 MachineInstr *PtrAdd =
7441 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7442 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7443 return std::nullopt;
7444
7445 MachineOperand &LHS = PtrAdd->getOperand(1);
7446 MachineOperand &RHS = PtrAdd->getOperand(2);
7447 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7448
7449 // The first case is the same as selectAddrModeXRO, except we need an extend.
7450 // In this case, we try to find a shift and extend, and fold them into the
7451 // addressing mode.
7452 //
7453 // E.g.
7454 //
7455 // off_reg = G_Z/S/ANYEXT ext_reg
7456 // val = G_CONSTANT LegalShiftVal
7457 // shift = G_SHL off_reg val
7458 // ptr = G_PTR_ADD base_reg shift
7459 // x = G_LOAD ptr
7460 //
7461 // In this case we can get a load like this:
7462 //
7463 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7464 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7465 SizeInBytes, /*WantsExt=*/true);
7466 if (ExtendedShl)
7467 return ExtendedShl;
7468
7469 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7470 //
7471 // e.g.
7472 // ldr something, [base_reg, ext_reg, sxtw]
7473 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7474 return std::nullopt;
7475
7476 // Check if this is an extend. We'll get an extend type if it is.
7478 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7480 return std::nullopt;
7481
7482 // Need a 32-bit wide register.
7483 MachineIRBuilder MIB(*PtrAdd);
7484 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7485 AArch64::GPR32RegClass, MIB);
7486 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7487
7488 // Base is LHS, offset is ExtReg.
7489 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7490 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7491 [=](MachineInstrBuilder &MIB) {
7492 MIB.addImm(SignExtend);
7493 MIB.addImm(0);
7494 }}};
7495}
7496
7497/// Select a "register plus unscaled signed 9-bit immediate" address. This
7498/// should only match when there is an offset that is not valid for a scaled
7499/// immediate addressing mode. The "Size" argument is the size in bytes of the
7500/// memory reference, which is needed here to know what is valid for a scaled
7501/// immediate.
7502InstructionSelector::ComplexRendererFns
7503AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7504 unsigned Size) const {
7505 MachineRegisterInfo &MRI =
7506 Root.getParent()->getParent()->getParent()->getRegInfo();
7507
7508 if (!Root.isReg())
7509 return std::nullopt;
7510
7511 if (!isBaseWithConstantOffset(Root, MRI))
7512 return std::nullopt;
7513
7514 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7515
7516 MachineOperand &OffImm = RootDef->getOperand(2);
7517 if (!OffImm.isReg())
7518 return std::nullopt;
7519 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7520 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7521 return std::nullopt;
7522 int64_t RHSC;
7523 MachineOperand &RHSOp1 = RHS->getOperand(1);
7524 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7525 return std::nullopt;
7526 RHSC = RHSOp1.getCImm()->getSExtValue();
7527
7528 if (RHSC >= -256 && RHSC < 256) {
7529 MachineOperand &Base = RootDef->getOperand(1);
7530 return {{
7531 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7532 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7533 }};
7534 }
7535 return std::nullopt;
7536}
7537
7538InstructionSelector::ComplexRendererFns
7539AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7540 unsigned Size,
7541 MachineRegisterInfo &MRI) const {
7542 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7543 return std::nullopt;
7544 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7545 if (Adrp.getOpcode() != AArch64::ADRP)
7546 return std::nullopt;
7547
7548 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7549 auto Offset = Adrp.getOperand(1).getOffset();
7550 if (Offset % Size != 0)
7551 return std::nullopt;
7552
7553 auto GV = Adrp.getOperand(1).getGlobal();
7554 if (GV->isThreadLocal())
7555 return std::nullopt;
7556
7557 auto &MF = *RootDef.getParent()->getParent();
7558 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7559 return std::nullopt;
7560
7561 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7562 MachineIRBuilder MIRBuilder(RootDef);
7563 Register AdrpReg = Adrp.getOperand(0).getReg();
7564 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7565 [=](MachineInstrBuilder &MIB) {
7566 MIB.addGlobalAddress(GV, Offset,
7567 OpFlags | AArch64II::MO_PAGEOFF |
7569 }}};
7570}
7571
7572/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7573/// "Size" argument is the size in bytes of the memory reference, which
7574/// determines the scale.
7575InstructionSelector::ComplexRendererFns
7576AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7577 unsigned Size) const {
7578 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7579 MachineRegisterInfo &MRI = MF.getRegInfo();
7580
7581 if (!Root.isReg())
7582 return std::nullopt;
7583
7584 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7585 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7586 return {{
7587 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7588 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7589 }};
7590 }
7591
7593 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7594 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7595 // globals into the offset.
7596 MachineInstr *RootParent = Root.getParent();
7597 if (CM == CodeModel::Small &&
7598 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7599 STI.isTargetDarwin())) {
7600 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7601 if (OpFns)
7602 return OpFns;
7603 }
7604
7605 if (isBaseWithConstantOffset(Root, MRI)) {
7606 MachineOperand &LHS = RootDef->getOperand(1);
7607 MachineOperand &RHS = RootDef->getOperand(2);
7608 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7609 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7610
7611 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7612 unsigned Scale = Log2_32(Size);
7613 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7614 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7615 return {{
7616 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7617 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7618 }};
7619
7620 return {{
7621 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7622 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7623 }};
7624 }
7625 }
7626
7627 // Before falling back to our general case, check if the unscaled
7628 // instructions can handle this. If so, that's preferable.
7629 if (selectAddrModeUnscaled(Root, Size))
7630 return std::nullopt;
7631
7632 return {{
7633 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7634 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7635 }};
7636}
7637
7638/// Given a shift instruction, return the correct shift type for that
7639/// instruction.
7641 switch (MI.getOpcode()) {
7642 default:
7644 case TargetOpcode::G_SHL:
7645 return AArch64_AM::LSL;
7646 case TargetOpcode::G_LSHR:
7647 return AArch64_AM::LSR;
7648 case TargetOpcode::G_ASHR:
7649 return AArch64_AM::ASR;
7650 case TargetOpcode::G_ROTR:
7651 return AArch64_AM::ROR;
7652 }
7653}
7654
7655/// Select a "shifted register" operand. If the value is not shifted, set the
7656/// shift operand to a default value of "lsl 0".
7657InstructionSelector::ComplexRendererFns
7658AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7659 bool AllowROR) const {
7660 if (!Root.isReg())
7661 return std::nullopt;
7662 MachineRegisterInfo &MRI =
7663 Root.getParent()->getParent()->getParent()->getRegInfo();
7664
7665 // Check if the operand is defined by an instruction which corresponds to
7666 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7667 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7669 if (ShType == AArch64_AM::InvalidShiftExtend)
7670 return std::nullopt;
7671 if (ShType == AArch64_AM::ROR && !AllowROR)
7672 return std::nullopt;
7673 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7674 return std::nullopt;
7675
7676 // Need an immediate on the RHS.
7677 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7678 auto Immed = getImmedFromMO(ShiftRHS);
7679 if (!Immed)
7680 return std::nullopt;
7681
7682 // We have something that we can fold. Fold in the shift's LHS and RHS into
7683 // the instruction.
7684 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7685 Register ShiftReg = ShiftLHS.getReg();
7686
7687 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7688 unsigned Val = *Immed & (NumBits - 1);
7689 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7690
7691 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7692 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7693}
7694
7695AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7696 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7697 unsigned Opc = MI.getOpcode();
7698
7699 // Handle explicit extend instructions first.
7700 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7701 unsigned Size;
7702 if (Opc == TargetOpcode::G_SEXT)
7703 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7704 else
7705 Size = MI.getOperand(2).getImm();
7706 assert(Size != 64 && "Extend from 64 bits?");
7707 switch (Size) {
7708 case 8:
7709 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7710 case 16:
7711 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7712 case 32:
7713 return AArch64_AM::SXTW;
7714 default:
7716 }
7717 }
7718
7719 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7720 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7721 assert(Size != 64 && "Extend from 64 bits?");
7722 switch (Size) {
7723 case 8:
7724 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7725 case 16:
7726 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7727 case 32:
7728 return AArch64_AM::UXTW;
7729 default:
7731 }
7732 }
7733
7734 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7735 // on the RHS.
7736 if (Opc != TargetOpcode::G_AND)
7738
7739 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7740 if (!MaybeAndMask)
7742 uint64_t AndMask = *MaybeAndMask;
7743 switch (AndMask) {
7744 default:
7746 case 0xFF:
7747 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7748 case 0xFFFF:
7749 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7750 case 0xFFFFFFFF:
7751 return AArch64_AM::UXTW;
7752 }
7753}
7754
7755Register AArch64InstructionSelector::moveScalarRegClass(
7756 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7757 MachineRegisterInfo &MRI = *MIB.getMRI();
7758 auto Ty = MRI.getType(Reg);
7759 assert(!Ty.isVector() && "Expected scalars only!");
7760 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7761 return Reg;
7762
7763 // Create a copy and immediately select it.
7764 // FIXME: We should have an emitCopy function?
7765 auto Copy = MIB.buildCopy({&RC}, {Reg});
7766 selectCopy(*Copy, TII, MRI, TRI, RBI);
7767 return Copy.getReg(0);
7768}
7769
7770/// Select an "extended register" operand. This operand folds in an extend
7771/// followed by an optional left shift.
7772InstructionSelector::ComplexRendererFns
7773AArch64InstructionSelector::selectArithExtendedRegister(
7774 MachineOperand &Root) const {
7775 if (!Root.isReg())
7776 return std::nullopt;
7777 MachineRegisterInfo &MRI =
7778 Root.getParent()->getParent()->getParent()->getRegInfo();
7779
7780 uint64_t ShiftVal = 0;
7781 Register ExtReg;
7783 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7784 if (!RootDef)
7785 return std::nullopt;
7786
7787 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7788 return std::nullopt;
7789
7790 // Check if we can fold a shift and an extend.
7791 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7792 // Look for a constant on the RHS of the shift.
7793 MachineOperand &RHS = RootDef->getOperand(2);
7794 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7795 if (!MaybeShiftVal)
7796 return std::nullopt;
7797 ShiftVal = *MaybeShiftVal;
7798 if (ShiftVal > 4)
7799 return std::nullopt;
7800 // Look for a valid extend instruction on the LHS of the shift.
7801 MachineOperand &LHS = RootDef->getOperand(1);
7802 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7803 if (!ExtDef)
7804 return std::nullopt;
7805 Ext = getExtendTypeForInst(*ExtDef, MRI);
7807 return std::nullopt;
7808 ExtReg = ExtDef->getOperand(1).getReg();
7809 } else {
7810 // Didn't get a shift. Try just folding an extend.
7811 Ext = getExtendTypeForInst(*RootDef, MRI);
7813 return std::nullopt;
7814 ExtReg = RootDef->getOperand(1).getReg();
7815
7816 // If we have a 32 bit instruction which zeroes out the high half of a
7817 // register, we get an implicit zero extend for free. Check if we have one.
7818 // FIXME: We actually emit the extend right now even though we don't have
7819 // to.
7820 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7821 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7822 if (isDef32(*ExtInst))
7823 return std::nullopt;
7824 }
7825 }
7826
7827 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7828 // copy.
7829 MachineIRBuilder MIB(*RootDef);
7830 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7831
7832 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7833 [=](MachineInstrBuilder &MIB) {
7834 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7835 }}};
7836}
7837
7838InstructionSelector::ComplexRendererFns
7839AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7840 if (!Root.isReg())
7841 return std::nullopt;
7842 MachineRegisterInfo &MRI =
7843 Root.getParent()->getParent()->getParent()->getRegInfo();
7844
7845 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7846 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7847 STI.isLittleEndian())
7848 Extract =
7849 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7850 if (!Extract)
7851 return std::nullopt;
7852
7853 if (auto *Unmerge = dyn_cast<GUnmerge>(Extract->MI)) {
7854 if (Unmerge->getNumDefs() == 2 &&
7855 Extract->Reg == Unmerge->getOperand(1).getReg()) {
7856 Register ExtReg = Unmerge->getSourceReg();
7857 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7858 }
7859 }
7860 if (auto *ExtElt = dyn_cast<GExtractVectorElement>(Extract->MI)) {
7861 LLT SrcTy = MRI.getType(ExtElt->getVectorReg());
7862 auto LaneIdx =
7863 getIConstantVRegValWithLookThrough(ExtElt->getIndexReg(), MRI);
7864 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7865 LaneIdx->Value.getSExtValue() == 1) {
7866 Register ExtReg = ExtElt->getVectorReg();
7867 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7868 }
7869 }
7870 if (auto *Subvec = dyn_cast<GExtractSubvector>(Extract->MI)) {
7871 LLT SrcTy = MRI.getType(Subvec->getSrcVec());
7872 auto LaneIdx = Subvec->getIndexImm();
7873 if (LaneIdx == SrcTy.getNumElements() / 2) {
7874 Register ExtReg = Subvec->getSrcVec();
7875 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7876 }
7877 }
7878
7879 return std::nullopt;
7880}
7881
7882InstructionSelector::ComplexRendererFns
7883AArch64InstructionSelector::selectCVTFixedPointVecBase(
7884 const MachineOperand &Root, bool isReciprocal) const {
7885 if (!Root.isReg())
7886 return std::nullopt;
7887 const MachineRegisterInfo &MRI =
7888 Root.getParent()->getParent()->getParent()->getRegInfo();
7889
7890 MachineInstr *Dup = getDefIgnoringCopies(Root.getReg(), MRI);
7891 if (Dup->getOpcode() != AArch64::G_DUP)
7892 return std::nullopt;
7893 std::optional<ValueAndVReg> CstVal =
7895 if (!CstVal)
7896 return std::nullopt;
7897
7898 unsigned RegWidth = MRI.getType(Root.getReg()).getScalarSizeInBits();
7899 APFloat FVal(0.0);
7900 switch (RegWidth) {
7901 case 16:
7902 FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
7903 break;
7904 case 32:
7905 FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
7906 break;
7907 case 64:
7908 FVal = APFloat(APFloat::IEEEdouble(), CstVal->Value);
7909 break;
7910 default:
7911 return std::nullopt;
7912 };
7913 if (unsigned FBits =
7914 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal))
7915 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(FBits); }}};
7916
7917 return std::nullopt;
7918}
7919
7920InstructionSelector::ComplexRendererFns
7921AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
7922 return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ false);
7923}
7924
7925InstructionSelector::ComplexRendererFns
7926AArch64InstructionSelector::selectCVTFixedPosRecipOperandVec(
7927 MachineOperand &Root) const {
7928 return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ true);
7929}
7930
7931void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
7932 const MachineInstr &MI,
7933 int OpIdx) const {
7934 // FIXME: This is only needed to satisfy the type checking in tablegen, and
7935 // should be able to reuse the Renderers already calculated by
7936 // selectCVTFixedPointVecBase.
7937 InstructionSelector::ComplexRendererFns Renderer =
7938 selectCVTFixedPointVecBase(MI.getOperand(2), /*isReciprocal*/ false);
7939 assert((Renderer && Renderer->size() == 1) &&
7940 "Expected selectCVTFixedPointVec to provide a function\n");
7941 (Renderer->front())(MIB);
7942}
7943
7944void AArch64InstructionSelector::renderFixedPointRecipXForm(
7945 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7946 InstructionSelector::ComplexRendererFns Renderer =
7947 selectCVTFixedPointVecBase(MI.getOperand(2), /*isReciprocal*/ true);
7948 assert((Renderer && Renderer->size() == 1) &&
7949 "Expected selectCVTFixedPosRecipOperandVec to provide a function\n");
7950 (Renderer->front())(MIB);
7951}
7952
7953void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7954 const MachineInstr &MI,
7955 int OpIdx) const {
7956 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7957 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7958 "Expected G_CONSTANT");
7959 std::optional<int64_t> CstVal =
7960 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7961 assert(CstVal && "Expected constant value");
7962 MIB.addImm(*CstVal);
7963}
7964
7965void AArch64InstructionSelector::renderLogicalImm32(
7966 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7967 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7968 "Expected G_CONSTANT");
7969 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7970 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7971 MIB.addImm(Enc);
7972}
7973
7974void AArch64InstructionSelector::renderLogicalImm64(
7975 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7976 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7977 "Expected G_CONSTANT");
7978 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7979 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7980 MIB.addImm(Enc);
7981}
7982
7983void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7984 const MachineInstr &MI,
7985 int OpIdx) const {
7986 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7987 "Expected G_UBSANTRAP");
7988 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7989}
7990
7991void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7992 const MachineInstr &MI,
7993 int OpIdx) const {
7994 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7995 "Expected G_FCONSTANT");
7996 MIB.addImm(
7997 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7998}
7999
8000void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
8001 const MachineInstr &MI,
8002 int OpIdx) const {
8003 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8004 "Expected G_FCONSTANT");
8005 MIB.addImm(
8006 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
8007}
8008
8009void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
8010 const MachineInstr &MI,
8011 int OpIdx) const {
8012 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8013 "Expected G_FCONSTANT");
8014 MIB.addImm(
8015 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
8016}
8017
8018void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
8019 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
8020 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8021 "Expected G_FCONSTANT");
8023 .getFPImm()
8024 ->getValueAPF()
8025 .bitcastToAPInt()
8026 .getZExtValue()));
8027}
8028
8029bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
8030 const MachineInstr &MI, unsigned NumBytes) const {
8031 if (!MI.mayLoadOrStore())
8032 return false;
8033 assert(MI.hasOneMemOperand() &&
8034 "Expected load/store to have only one mem op!");
8035 return (*MI.memoperands_begin())->getSize() == NumBytes;
8036}
8037
8038bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8039 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8040 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
8041 return false;
8042
8043 // Only return true if we know the operation will zero-out the high half of
8044 // the 64-bit register. Truncates can be subregister copies, which don't
8045 // zero out the high bits. Copies and other copy-like instructions can be
8046 // fed by truncates, or could be lowered as subregister copies.
8047 switch (MI.getOpcode()) {
8048 default:
8049 return true;
8050 case TargetOpcode::COPY:
8051 case TargetOpcode::G_BITCAST:
8052 case TargetOpcode::G_TRUNC:
8053 case TargetOpcode::G_PHI:
8054 return false;
8055 }
8056}
8057
8058
8059// Perform fixups on the given PHI instruction's operands to force them all
8060// to be the same as the destination regbank.
8062 const AArch64RegisterBankInfo &RBI) {
8063 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8064 Register DstReg = MI.getOperand(0).getReg();
8065 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
8066 assert(DstRB && "Expected PHI dst to have regbank assigned");
8067 MachineIRBuilder MIB(MI);
8068
8069 // Go through each operand and ensure it has the same regbank.
8070 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
8071 if (!MO.isReg())
8072 continue;
8073 Register OpReg = MO.getReg();
8074 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
8075 if (RB != DstRB) {
8076 // Insert a cross-bank copy.
8077 auto *OpDef = MRI.getVRegDef(OpReg);
8078 const LLT &Ty = MRI.getType(OpReg);
8079 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8080
8081 // Any instruction we insert must appear after all PHIs in the block
8082 // for the block to be valid MIR.
8083 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
8084 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8085 InsertPt = OpDefBB.getFirstNonPHI();
8086 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
8087 auto Copy = MIB.buildCopy(Ty, OpReg);
8088 MRI.setRegBank(Copy.getReg(0), *DstRB);
8089 MO.setReg(Copy.getReg(0));
8090 }
8091 }
8092}
8093
8094void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8095 // We're looking for PHIs, build a list so we don't invalidate iterators.
8096 MachineRegisterInfo &MRI = MF.getRegInfo();
8098 for (auto &BB : MF) {
8099 for (auto &MI : BB) {
8100 if (MI.getOpcode() == TargetOpcode::G_PHI)
8101 Phis.emplace_back(&MI);
8102 }
8103 }
8104
8105 for (auto *MI : Phis) {
8106 // We need to do some work here if the operand types are < 16 bit and they
8107 // are split across fpr/gpr banks. Since all types <32b on gpr
8108 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8109 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8110 // be selecting heterogenous regbanks for operands if possible, but we
8111 // still need to be able to deal with it here.
8112 //
8113 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8114 // one other operand is on the fpr bank, then we add cross-bank copies
8115 // to homogenize the operand banks. For simplicity the bank that we choose
8116 // to settle on is whatever bank the def operand has. For example:
8117 //
8118 // %endbb:
8119 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8120 // =>
8121 // %bb2:
8122 // ...
8123 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8124 // ...
8125 // %endbb:
8126 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8127 bool HasGPROp = false, HasFPROp = false;
8128 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8129 if (!MO.isReg())
8130 continue;
8131 const LLT &Ty = MRI.getType(MO.getReg());
8132 if (!Ty.isValid() || !Ty.isScalar())
8133 break;
8134 if (Ty.getSizeInBits() >= 32)
8135 break;
8136 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8137 // If for some reason we don't have a regbank yet. Don't try anything.
8138 if (!RB)
8139 break;
8140
8141 if (RB->getID() == AArch64::GPRRegBankID)
8142 HasGPROp = true;
8143 else
8144 HasFPROp = true;
8145 }
8146 // We have heterogenous regbanks, need to fixup.
8147 if (HasGPROp && HasFPROp)
8148 fixupPHIOpBanks(*MI, MRI, RBI);
8149 }
8150}
8151
8152namespace llvm {
8153InstructionSelector *
8155 const AArch64Subtarget &Subtarget,
8156 const AArch64RegisterBankInfo &RBI) {
8157 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8158}
8159}
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
static MachineBasicBlock * emitSelect(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit SELECT instruction, using ISEL if available, otherwise use branch-based control flow.
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isIntPredicate() const
Definition InstrTypes.h:783
bool isUnsigned() const
Definition InstrTypes.h:936
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:463
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:473
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:467
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:579
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
LLT getScalarType() const
constexpr bool isPointerVector() const
constexpr bool isInteger() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
void constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
def_instr_iterator def_instr_begin(Register RegNo) const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
constexpr double e
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:858
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:57
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:156
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.