LLVM 20.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/Type.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/Debug.h"
49#include <optional>
50
51#define DEBUG_TYPE "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 /// Save state that is shared between select calls, call select on \p I and
106 /// then restore the saved state. This can be used to recursively call select
107 /// within a select call.
108 bool selectAndRestoreState(MachineInstr &I);
109
110 // Do some preprocessing of G_PHIs before we begin selection.
111 void processPHIs(MachineFunction &MF);
112
113 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
114
115 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
116 bool contractCrossBankCopyIntoStore(MachineInstr &I,
118
119 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
120
121 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
122 MachineRegisterInfo &MRI) const;
123 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
124 MachineRegisterInfo &MRI) const;
125
126 ///@{
127 /// Helper functions for selectCompareBranch.
128 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
129 MachineIRBuilder &MIB) const;
130 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
131 MachineIRBuilder &MIB) const;
132 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
133 MachineIRBuilder &MIB) const;
134 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
135 MachineBasicBlock *DstMBB,
136 MachineIRBuilder &MIB) const;
137 ///@}
138
139 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141
142 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
143 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
144
145 // Helper to generate an equivalent of scalar_to_vector into a new register,
146 // returned via 'Dst'.
147 MachineInstr *emitScalarToVector(unsigned EltSize,
148 const TargetRegisterClass *DstRC,
149 Register Scalar,
150 MachineIRBuilder &MIRBuilder) const;
151 /// Helper to narrow vector that was widened by emitScalarToVector.
152 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
153 /// vector, correspondingly.
154 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
155 MachineIRBuilder &MIRBuilder,
156 MachineRegisterInfo &MRI) const;
157
158 /// Emit a lane insert into \p DstReg, or a new vector register if
159 /// std::nullopt is provided.
160 ///
161 /// The lane inserted into is defined by \p LaneIdx. The vector source
162 /// register is given by \p SrcReg. The register containing the element is
163 /// given by \p EltReg.
164 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
165 Register EltReg, unsigned LaneIdx,
166 const RegisterBank &RB,
167 MachineIRBuilder &MIRBuilder) const;
168
169 /// Emit a sequence of instructions representing a constant \p CV for a
170 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
171 ///
172 /// \returns the last instruction in the sequence on success, and nullptr
173 /// otherwise.
174 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
175 MachineIRBuilder &MIRBuilder,
177
178 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
179 MachineIRBuilder &MIRBuilder);
180
181 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
182 MachineIRBuilder &MIRBuilder, bool Inv);
183
184 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
185 MachineIRBuilder &MIRBuilder, bool Inv);
186 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
187 MachineIRBuilder &MIRBuilder);
188 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
189 MachineIRBuilder &MIRBuilder, bool Inv);
190 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
191 MachineIRBuilder &MIRBuilder);
192
193 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
196 /// SUBREG_TO_REG.
197 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
198 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
203 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
206
207 /// Helper function to select vector load intrinsics like
208 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
209 /// \p Opc is the opcode that the selected instruction should use.
210 /// \p NumVecs is the number of vector destinations for the instruction.
211 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
212 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
213 MachineInstr &I);
214 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
215 MachineInstr &I);
216 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
217 unsigned Opc);
218 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
219 unsigned Opc);
220 bool selectIntrinsicWithSideEffects(MachineInstr &I,
222 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
223 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectPtrAuthGlobalValue(MachineInstr &I,
227 MachineRegisterInfo &MRI) const;
228 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
229 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
231 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
232 unsigned Opc1, unsigned Opc2, bool isExt);
233
234 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
235 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
237
238 unsigned emitConstantPoolEntry(const Constant *CPVal,
239 MachineFunction &MF) const;
241 MachineIRBuilder &MIRBuilder) const;
242
243 // Emit a vector concat operation.
244 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
245 Register Op2,
246 MachineIRBuilder &MIRBuilder) const;
247
248 // Emit an integer compare between LHS and RHS, which checks for Predicate.
249 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
250 MachineOperand &Predicate,
251 MachineIRBuilder &MIRBuilder) const;
252
253 /// Emit a floating point comparison between \p LHS and \p RHS.
254 /// \p Pred if given is the intended predicate to use.
256 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
257 std::optional<CmpInst::Predicate> = std::nullopt) const;
258
260 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
261 std::initializer_list<llvm::SrcOp> SrcOps,
262 MachineIRBuilder &MIRBuilder,
263 const ComplexRendererFns &RenderFns = std::nullopt) const;
264 /// Helper function to emit an add or sub instruction.
265 ///
266 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
267 /// in a specific order.
268 ///
269 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
270 ///
271 /// \code
272 /// const std::array<std::array<unsigned, 2>, 4> Table {
273 /// {{AArch64::ADDXri, AArch64::ADDWri},
274 /// {AArch64::ADDXrs, AArch64::ADDWrs},
275 /// {AArch64::ADDXrr, AArch64::ADDWrr},
276 /// {AArch64::SUBXri, AArch64::SUBWri},
277 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
278 /// \endcode
279 ///
280 /// Each row in the table corresponds to a different addressing mode. Each
281 /// column corresponds to a different register size.
282 ///
283 /// \attention Rows must be structured as follows:
284 /// - Row 0: The ri opcode variants
285 /// - Row 1: The rs opcode variants
286 /// - Row 2: The rr opcode variants
287 /// - Row 3: The ri opcode variants for negative immediates
288 /// - Row 4: The rx opcode variants
289 ///
290 /// \attention Columns must be structured as follows:
291 /// - Column 0: The 64-bit opcode variants
292 /// - Column 1: The 32-bit opcode variants
293 ///
294 /// \p Dst is the destination register of the binop to emit.
295 /// \p LHS is the left-hand operand of the binop to emit.
296 /// \p RHS is the right-hand operand of the binop to emit.
297 MachineInstr *emitAddSub(
298 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
300 MachineIRBuilder &MIRBuilder) const;
301 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
303 MachineIRBuilder &MIRBuilder) const;
305 MachineIRBuilder &MIRBuilder) const;
307 MachineIRBuilder &MIRBuilder) const;
309 MachineIRBuilder &MIRBuilder) const;
311 MachineIRBuilder &MIRBuilder) const;
313 MachineIRBuilder &MIRBuilder) const;
315 MachineIRBuilder &MIRBuilder) const;
316 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
318 MachineIRBuilder &MIRBuilder) const;
319 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
320 const RegisterBank &DstRB, LLT ScalarTy,
321 Register VecReg, unsigned LaneIdx,
322 MachineIRBuilder &MIRBuilder) const;
323 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
325 MachineIRBuilder &MIRBuilder) const;
326 /// Emit a CSet for a FP compare.
327 ///
328 /// \p Dst is expected to be a 32-bit scalar register.
329 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
330 MachineIRBuilder &MIRBuilder) const;
331
332 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
333 /// Might elide the instruction if the previous instruction already sets NZCV
334 /// correctly.
335 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
336
337 /// Emit the overflow op for \p Opcode.
338 ///
339 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
340 /// G_USUBO, etc.
341 std::pair<MachineInstr *, AArch64CC::CondCode>
342 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
343 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
344
345 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
346
347 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
348 /// In some cases this is even possible with OR operations in the expression.
350 MachineIRBuilder &MIB) const;
353 AArch64CC::CondCode Predicate,
355 MachineIRBuilder &MIB) const;
357 bool Negate, Register CCOp,
358 AArch64CC::CondCode Predicate,
359 MachineIRBuilder &MIB) const;
360
361 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
362 /// \p IsNegative is true if the test should be "not zero".
363 /// This will also optimize the test bit instruction when possible.
364 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
365 MachineBasicBlock *DstMBB,
366 MachineIRBuilder &MIB) const;
367
368 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
369 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
370 MachineBasicBlock *DestMBB,
371 MachineIRBuilder &MIB) const;
372
373 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
374 // We use these manually instead of using the importer since it doesn't
375 // support SDNodeXForm.
376 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
377 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
380
381 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
382 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
383 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
384
385 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
386 unsigned Size) const;
387
388 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
389 return selectAddrModeUnscaled(Root, 1);
390 }
391 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 2);
393 }
394 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 4);
396 }
397 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 8);
399 }
400 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 16);
402 }
403
404 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
405 /// from complex pattern matchers like selectAddrModeIndexed().
406 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
407 MachineRegisterInfo &MRI) const;
408
409 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
410 unsigned Size) const;
411 template <int Width>
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
413 return selectAddrModeIndexed(Root, Width / 8);
414 }
415
416 std::optional<bool>
417 isWorthFoldingIntoAddrMode(MachineInstr &MI,
418 const MachineRegisterInfo &MRI) const;
419
420 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
422 bool IsAddrOperand) const;
423 ComplexRendererFns
424 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
425 unsigned SizeInBytes) const;
426
427 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
428 /// or not a shift + extend should be folded into an addressing mode. Returns
429 /// None when this is not profitable or possible.
430 ComplexRendererFns
431 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
432 MachineOperand &Offset, unsigned SizeInBytes,
433 bool WantsExt) const;
434 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
435 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
436 unsigned SizeInBytes) const;
437 template <int Width>
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
439 return selectAddrModeXRO(Root, Width / 8);
440 }
441
442 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
443 unsigned SizeInBytes) const;
444 template <int Width>
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
446 return selectAddrModeWRO(Root, Width / 8);
447 }
448
449 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
450 bool AllowROR = false) const;
451
452 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
453 return selectShiftedRegister(Root);
454 }
455
456 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
457 return selectShiftedRegister(Root, true);
458 }
459
460 /// Given an extend instruction, determine the correct shift-extend type for
461 /// that instruction.
462 ///
463 /// If the instruction is going to be used in a load or store, pass
464 /// \p IsLoadStore = true.
466 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
467 bool IsLoadStore = false) const;
468
469 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
470 ///
471 /// \returns Either \p Reg if no change was necessary, or the new register
472 /// created by moving \p Reg.
473 ///
474 /// Note: This uses emitCopy right now.
475 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
476 MachineIRBuilder &MIB) const;
477
478 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
479
480 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
481
482 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
483 int OpIdx = -1) const;
484 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
485 int OpIdx = -1) const;
486 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
487 int OpIdx = -1) const;
488 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx) const;
490 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
491 int OpIdx = -1) const;
492 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
493 int OpIdx = -1) const;
494 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
495 int OpIdx = -1) const;
496 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
497 const MachineInstr &MI,
498 int OpIdx = -1) const;
499
500 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
501 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
502
503 // Optimization methods.
504 bool tryOptSelect(GSelect &Sel);
505 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
506 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
507 MachineOperand &Predicate,
508 MachineIRBuilder &MIRBuilder) const;
509
510 /// Return true if \p MI is a load or store of \p NumBytes bytes.
511 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
512
513 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
514 /// register zeroed out. In other words, the result of MI has been explicitly
515 /// zero extended.
516 bool isDef32(const MachineInstr &MI) const;
517
518 const AArch64TargetMachine &TM;
519 const AArch64Subtarget &STI;
520 const AArch64InstrInfo &TII;
522 const AArch64RegisterBankInfo &RBI;
523
524 bool ProduceNonFlagSettingCondBr = false;
525
526 // Some cached values used during selection.
527 // We use LR as a live-in register, and we keep track of it here as it can be
528 // clobbered by calls.
529 Register MFReturnAddr;
530
532
533#define GET_GLOBALISEL_PREDICATES_DECL
534#include "AArch64GenGlobalISel.inc"
535#undef GET_GLOBALISEL_PREDICATES_DECL
536
537// We declare the temporaries used by selectImpl() in the class to minimize the
538// cost of constructing placeholder values.
539#define GET_GLOBALISEL_TEMPORARIES_DECL
540#include "AArch64GenGlobalISel.inc"
541#undef GET_GLOBALISEL_TEMPORARIES_DECL
542};
543
544} // end anonymous namespace
545
546#define GET_GLOBALISEL_IMPL
547#include "AArch64GenGlobalISel.inc"
548#undef GET_GLOBALISEL_IMPL
549
550AArch64InstructionSelector::AArch64InstructionSelector(
551 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
552 const AArch64RegisterBankInfo &RBI)
553 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
554 RBI(RBI),
556#include "AArch64GenGlobalISel.inc"
559#include "AArch64GenGlobalISel.inc"
561{
562}
563
564// FIXME: This should be target-independent, inferred from the types declared
565// for each class in the bank.
566//
567/// Given a register bank, and a type, return the smallest register class that
568/// can represent that combination.
569static const TargetRegisterClass *
570getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
571 bool GetAllRegSet = false) {
572 if (RB.getID() == AArch64::GPRRegBankID) {
573 if (Ty.getSizeInBits() <= 32)
574 return GetAllRegSet ? &AArch64::GPR32allRegClass
575 : &AArch64::GPR32RegClass;
576 if (Ty.getSizeInBits() == 64)
577 return GetAllRegSet ? &AArch64::GPR64allRegClass
578 : &AArch64::GPR64RegClass;
579 if (Ty.getSizeInBits() == 128)
580 return &AArch64::XSeqPairsClassRegClass;
581 return nullptr;
582 }
583
584 if (RB.getID() == AArch64::FPRRegBankID) {
585 switch (Ty.getSizeInBits()) {
586 case 8:
587 return &AArch64::FPR8RegClass;
588 case 16:
589 return &AArch64::FPR16RegClass;
590 case 32:
591 return &AArch64::FPR32RegClass;
592 case 64:
593 return &AArch64::FPR64RegClass;
594 case 128:
595 return &AArch64::FPR128RegClass;
596 }
597 return nullptr;
598 }
599
600 return nullptr;
601}
602
603/// Given a register bank, and size in bits, return the smallest register class
604/// that can represent that combination.
605static const TargetRegisterClass *
607 bool GetAllRegSet = false) {
608 if (SizeInBits.isScalable()) {
609 assert(RB.getID() == AArch64::FPRRegBankID &&
610 "Expected FPR regbank for scalable type size");
611 return &AArch64::ZPRRegClass;
612 }
613
614 unsigned RegBankID = RB.getID();
615
616 if (RegBankID == AArch64::GPRRegBankID) {
617 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
626 }
627
628 if (RegBankID == AArch64::FPRRegBankID) {
629 if (SizeInBits.isScalable()) {
630 assert(SizeInBits == TypeSize::getScalable(128) &&
631 "Unexpected scalable register size");
632 return &AArch64::ZPRRegClass;
633 }
634
635 switch (SizeInBits) {
636 default:
637 return nullptr;
638 case 8:
639 return &AArch64::FPR8RegClass;
640 case 16:
641 return &AArch64::FPR16RegClass;
642 case 32:
643 return &AArch64::FPR32RegClass;
644 case 64:
645 return &AArch64::FPR64RegClass;
646 case 128:
647 return &AArch64::FPR128RegClass;
648 }
649 }
650
651 return nullptr;
652}
653
654/// Returns the correct subregister to use for a given register class.
656 const TargetRegisterInfo &TRI, unsigned &SubReg) {
657 switch (TRI.getRegSizeInBits(*RC)) {
658 case 8:
659 SubReg = AArch64::bsub;
660 break;
661 case 16:
662 SubReg = AArch64::hsub;
663 break;
664 case 32:
665 if (RC != &AArch64::FPR32RegClass)
666 SubReg = AArch64::sub_32;
667 else
668 SubReg = AArch64::ssub;
669 break;
670 case 64:
671 SubReg = AArch64::dsub;
672 break;
673 default:
675 dbgs() << "Couldn't find appropriate subregister for register class.");
676 return false;
677 }
678
679 return true;
680}
681
682/// Returns the minimum size the given register bank can hold.
683static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
684 switch (RB.getID()) {
685 case AArch64::GPRRegBankID:
686 return 32;
687 case AArch64::FPRRegBankID:
688 return 8;
689 default:
690 llvm_unreachable("Tried to get minimum size for unknown register bank.");
691 }
692}
693
694/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
695/// Helper function for functions like createDTuple and createQTuple.
696///
697/// \p RegClassIDs - The list of register class IDs available for some tuple of
698/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
699/// expected to contain between 2 and 4 tuple classes.
700///
701/// \p SubRegs - The list of subregister classes associated with each register
702/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
703/// subregister class. The index of each subregister class is expected to
704/// correspond with the index of each register class.
705///
706/// \returns Either the destination register of REG_SEQUENCE instruction that
707/// was created, or the 0th element of \p Regs if \p Regs contains a single
708/// element.
710 const unsigned RegClassIDs[],
711 const unsigned SubRegs[], MachineIRBuilder &MIB) {
712 unsigned NumRegs = Regs.size();
713 if (NumRegs == 1)
714 return Regs[0];
715 assert(NumRegs >= 2 && NumRegs <= 4 &&
716 "Only support between two and 4 registers in a tuple!");
718 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
719 auto RegSequence =
720 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
721 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
722 RegSequence.addUse(Regs[I]);
723 RegSequence.addImm(SubRegs[I]);
724 }
725 return RegSequence.getReg(0);
726}
727
728/// Create a tuple of D-registers using the registers in \p Regs.
730 static const unsigned RegClassIDs[] = {
731 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
732 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
733 AArch64::dsub2, AArch64::dsub3};
734 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
735}
736
737/// Create a tuple of Q-registers using the registers in \p Regs.
739 static const unsigned RegClassIDs[] = {
740 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
741 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
742 AArch64::qsub2, AArch64::qsub3};
743 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
744}
745
746static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
747 auto &MI = *Root.getParent();
748 auto &MBB = *MI.getParent();
749 auto &MF = *MBB.getParent();
750 auto &MRI = MF.getRegInfo();
751 uint64_t Immed;
752 if (Root.isImm())
753 Immed = Root.getImm();
754 else if (Root.isCImm())
755 Immed = Root.getCImm()->getZExtValue();
756 else if (Root.isReg()) {
757 auto ValAndVReg =
759 if (!ValAndVReg)
760 return std::nullopt;
761 Immed = ValAndVReg->Value.getSExtValue();
762 } else
763 return std::nullopt;
764 return Immed;
765}
766
767/// Check whether \p I is a currently unsupported binary operation:
768/// - it has an unsized type
769/// - an operand is not a vreg
770/// - all operands are not in the same bank
771/// These are checks that should someday live in the verifier, but right now,
772/// these are mostly limitations of the aarch64 selector.
773static bool unsupportedBinOp(const MachineInstr &I,
774 const AArch64RegisterBankInfo &RBI,
776 const AArch64RegisterInfo &TRI) {
777 LLT Ty = MRI.getType(I.getOperand(0).getReg());
778 if (!Ty.isValid()) {
779 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
780 return true;
781 }
782
783 const RegisterBank *PrevOpBank = nullptr;
784 for (auto &MO : I.operands()) {
785 // FIXME: Support non-register operands.
786 if (!MO.isReg()) {
787 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
788 return true;
789 }
790
791 // FIXME: Can generic operations have physical registers operands? If
792 // so, this will need to be taught about that, and we'll need to get the
793 // bank out of the minimal class for the register.
794 // Either way, this needs to be documented (and possibly verified).
795 if (!MO.getReg().isVirtual()) {
796 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
797 return true;
798 }
799
800 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
801 if (!OpBank) {
802 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
803 return true;
804 }
805
806 if (PrevOpBank && OpBank != PrevOpBank) {
807 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
808 return true;
809 }
810 PrevOpBank = OpBank;
811 }
812 return false;
813}
814
815/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
816/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
817/// and of size \p OpSize.
818/// \returns \p GenericOpc if the combination is unsupported.
819static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
820 unsigned OpSize) {
821 switch (RegBankID) {
822 case AArch64::GPRRegBankID:
823 if (OpSize == 32) {
824 switch (GenericOpc) {
825 case TargetOpcode::G_SHL:
826 return AArch64::LSLVWr;
827 case TargetOpcode::G_LSHR:
828 return AArch64::LSRVWr;
829 case TargetOpcode::G_ASHR:
830 return AArch64::ASRVWr;
831 default:
832 return GenericOpc;
833 }
834 } else if (OpSize == 64) {
835 switch (GenericOpc) {
836 case TargetOpcode::G_PTR_ADD:
837 return AArch64::ADDXrr;
838 case TargetOpcode::G_SHL:
839 return AArch64::LSLVXr;
840 case TargetOpcode::G_LSHR:
841 return AArch64::LSRVXr;
842 case TargetOpcode::G_ASHR:
843 return AArch64::ASRVXr;
844 default:
845 return GenericOpc;
846 }
847 }
848 break;
849 case AArch64::FPRRegBankID:
850 switch (OpSize) {
851 case 32:
852 switch (GenericOpc) {
853 case TargetOpcode::G_FADD:
854 return AArch64::FADDSrr;
855 case TargetOpcode::G_FSUB:
856 return AArch64::FSUBSrr;
857 case TargetOpcode::G_FMUL:
858 return AArch64::FMULSrr;
859 case TargetOpcode::G_FDIV:
860 return AArch64::FDIVSrr;
861 default:
862 return GenericOpc;
863 }
864 case 64:
865 switch (GenericOpc) {
866 case TargetOpcode::G_FADD:
867 return AArch64::FADDDrr;
868 case TargetOpcode::G_FSUB:
869 return AArch64::FSUBDrr;
870 case TargetOpcode::G_FMUL:
871 return AArch64::FMULDrr;
872 case TargetOpcode::G_FDIV:
873 return AArch64::FDIVDrr;
874 case TargetOpcode::G_OR:
875 return AArch64::ORRv8i8;
876 default:
877 return GenericOpc;
878 }
879 }
880 break;
881 }
882 return GenericOpc;
883}
884
885/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
886/// appropriate for the (value) register bank \p RegBankID and of memory access
887/// size \p OpSize. This returns the variant with the base+unsigned-immediate
888/// addressing mode (e.g., LDRXui).
889/// \returns \p GenericOpc if the combination is unsupported.
890static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
891 unsigned OpSize) {
892 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
893 switch (RegBankID) {
894 case AArch64::GPRRegBankID:
895 switch (OpSize) {
896 case 8:
897 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
898 case 16:
899 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
900 case 32:
901 return isStore ? AArch64::STRWui : AArch64::LDRWui;
902 case 64:
903 return isStore ? AArch64::STRXui : AArch64::LDRXui;
904 }
905 break;
906 case AArch64::FPRRegBankID:
907 switch (OpSize) {
908 case 8:
909 return isStore ? AArch64::STRBui : AArch64::LDRBui;
910 case 16:
911 return isStore ? AArch64::STRHui : AArch64::LDRHui;
912 case 32:
913 return isStore ? AArch64::STRSui : AArch64::LDRSui;
914 case 64:
915 return isStore ? AArch64::STRDui : AArch64::LDRDui;
916 case 128:
917 return isStore ? AArch64::STRQui : AArch64::LDRQui;
918 }
919 break;
920 }
921 return GenericOpc;
922}
923
924/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
925/// to \p *To.
926///
927/// E.g "To = COPY SrcReg:SubReg"
929 const RegisterBankInfo &RBI, Register SrcReg,
930 const TargetRegisterClass *To, unsigned SubReg) {
931 assert(SrcReg.isValid() && "Expected a valid source register?");
932 assert(To && "Destination register class cannot be null");
933 assert(SubReg && "Expected a valid subregister");
934
935 MachineIRBuilder MIB(I);
936 auto SubRegCopy =
937 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
938 MachineOperand &RegOp = I.getOperand(1);
939 RegOp.setReg(SubRegCopy.getReg(0));
940
941 // It's possible that the destination register won't be constrained. Make
942 // sure that happens.
943 if (!I.getOperand(0).getReg().isPhysical())
944 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
945
946 return true;
947}
948
949/// Helper function to get the source and destination register classes for a
950/// copy. Returns a std::pair containing the source register class for the
951/// copy, and the destination register class for the copy. If a register class
952/// cannot be determined, then it will be nullptr.
953static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
956 const RegisterBankInfo &RBI) {
957 Register DstReg = I.getOperand(0).getReg();
958 Register SrcReg = I.getOperand(1).getReg();
959 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
960 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
961
962 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
963 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
964
965 // Special casing for cross-bank copies of s1s. We can technically represent
966 // a 1-bit value with any size of register. The minimum size for a GPR is 32
967 // bits. So, we need to put the FPR on 32 bits as well.
968 //
969 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
970 // then we can pull it into the helpers that get the appropriate class for a
971 // register bank. Or make a new helper that carries along some constraint
972 // information.
973 if (SrcRegBank != DstRegBank &&
974 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
975 SrcSize = DstSize = TypeSize::getFixed(32);
976
977 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
978 getMinClassForRegBank(DstRegBank, DstSize, true)};
979}
980
981// FIXME: We need some sort of API in RBI/TRI to allow generic code to
982// constrain operands of simple instructions given a TargetRegisterClass
983// and LLT
985 const RegisterBankInfo &RBI) {
986 for (MachineOperand &MO : I.operands()) {
987 if (!MO.isReg())
988 continue;
989 Register Reg = MO.getReg();
990 if (!Reg)
991 continue;
992 if (Reg.isPhysical())
993 continue;
994 LLT Ty = MRI.getType(Reg);
995 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
996 const TargetRegisterClass *RC =
997 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
998 if (!RC) {
999 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1000 RC = getRegClassForTypeOnBank(Ty, RB);
1001 if (!RC) {
1002 LLVM_DEBUG(
1003 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1004 break;
1005 }
1006 }
1007 RBI.constrainGenericRegister(Reg, *RC, MRI);
1008 }
1009
1010 return true;
1011}
1012
1015 const RegisterBankInfo &RBI) {
1016 Register DstReg = I.getOperand(0).getReg();
1017 Register SrcReg = I.getOperand(1).getReg();
1018 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1019 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1020
1021 // Find the correct register classes for the source and destination registers.
1022 const TargetRegisterClass *SrcRC;
1023 const TargetRegisterClass *DstRC;
1024 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1025
1026 if (!DstRC) {
1027 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1028 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1029 return false;
1030 }
1031
1032 // Is this a copy? If so, then we may need to insert a subregister copy.
1033 if (I.isCopy()) {
1034 // Yes. Check if there's anything to fix up.
1035 if (!SrcRC) {
1036 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1037 return false;
1038 }
1039
1040 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1041 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1042 unsigned SubReg;
1043
1044 // If the source bank doesn't support a subregister copy small enough,
1045 // then we first need to copy to the destination bank.
1046 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1047 const TargetRegisterClass *DstTempRC =
1048 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1049 getSubRegForClass(DstRC, TRI, SubReg);
1050
1051 MachineIRBuilder MIB(I);
1052 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1053 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1054 } else if (SrcSize > DstSize) {
1055 // If the source register is bigger than the destination we need to
1056 // perform a subregister copy.
1057 const TargetRegisterClass *SubRegRC =
1058 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1059 getSubRegForClass(SubRegRC, TRI, SubReg);
1060 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1061 } else if (DstSize > SrcSize) {
1062 // If the destination register is bigger than the source we need to do
1063 // a promotion using SUBREG_TO_REG.
1064 const TargetRegisterClass *PromotionRC =
1065 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1066 getSubRegForClass(SrcRC, TRI, SubReg);
1067
1068 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1069 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1070 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1071 .addImm(0)
1072 .addUse(SrcReg)
1073 .addImm(SubReg);
1074 MachineOperand &RegOp = I.getOperand(1);
1075 RegOp.setReg(PromoteReg);
1076 }
1077
1078 // If the destination is a physical register, then there's nothing to
1079 // change, so we're done.
1080 if (DstReg.isPhysical())
1081 return true;
1082 }
1083
1084 // No need to constrain SrcReg. It will get constrained when we hit another
1085 // of its use or its defs. Copies do not have constraints.
1086 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1087 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1088 << " operand\n");
1089 return false;
1090 }
1091
1092 // If this a GPR ZEXT that we want to just reduce down into a copy.
1093 // The sizes will be mismatched with the source < 32b but that's ok.
1094 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1095 I.setDesc(TII.get(AArch64::COPY));
1096 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1097 return selectCopy(I, TII, MRI, TRI, RBI);
1098 }
1099
1100 I.setDesc(TII.get(AArch64::COPY));
1101 return true;
1102}
1103
1104static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1105 if (!DstTy.isScalar() || !SrcTy.isScalar())
1106 return GenericOpc;
1107
1108 const unsigned DstSize = DstTy.getSizeInBits();
1109 const unsigned SrcSize = SrcTy.getSizeInBits();
1110
1111 switch (DstSize) {
1112 case 32:
1113 switch (SrcSize) {
1114 case 32:
1115 switch (GenericOpc) {
1116 case TargetOpcode::G_SITOFP:
1117 return AArch64::SCVTFUWSri;
1118 case TargetOpcode::G_UITOFP:
1119 return AArch64::UCVTFUWSri;
1120 case TargetOpcode::G_FPTOSI:
1121 return AArch64::FCVTZSUWSr;
1122 case TargetOpcode::G_FPTOUI:
1123 return AArch64::FCVTZUUWSr;
1124 default:
1125 return GenericOpc;
1126 }
1127 case 64:
1128 switch (GenericOpc) {
1129 case TargetOpcode::G_SITOFP:
1130 return AArch64::SCVTFUXSri;
1131 case TargetOpcode::G_UITOFP:
1132 return AArch64::UCVTFUXSri;
1133 case TargetOpcode::G_FPTOSI:
1134 return AArch64::FCVTZSUWDr;
1135 case TargetOpcode::G_FPTOUI:
1136 return AArch64::FCVTZUUWDr;
1137 default:
1138 return GenericOpc;
1139 }
1140 default:
1141 return GenericOpc;
1142 }
1143 case 64:
1144 switch (SrcSize) {
1145 case 32:
1146 switch (GenericOpc) {
1147 case TargetOpcode::G_SITOFP:
1148 return AArch64::SCVTFUWDri;
1149 case TargetOpcode::G_UITOFP:
1150 return AArch64::UCVTFUWDri;
1151 case TargetOpcode::G_FPTOSI:
1152 return AArch64::FCVTZSUXSr;
1153 case TargetOpcode::G_FPTOUI:
1154 return AArch64::FCVTZUUXSr;
1155 default:
1156 return GenericOpc;
1157 }
1158 case 64:
1159 switch (GenericOpc) {
1160 case TargetOpcode::G_SITOFP:
1161 return AArch64::SCVTFUXDri;
1162 case TargetOpcode::G_UITOFP:
1163 return AArch64::UCVTFUXDri;
1164 case TargetOpcode::G_FPTOSI:
1165 return AArch64::FCVTZSUXDr;
1166 case TargetOpcode::G_FPTOUI:
1167 return AArch64::FCVTZUUXDr;
1168 default:
1169 return GenericOpc;
1170 }
1171 default:
1172 return GenericOpc;
1173 }
1174 default:
1175 return GenericOpc;
1176 };
1177 return GenericOpc;
1178}
1179
1181AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1183 MachineIRBuilder &MIB) const {
1184 MachineRegisterInfo &MRI = *MIB.getMRI();
1185 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1186 RBI.getRegBank(True, MRI, TRI)->getID() &&
1187 "Expected both select operands to have the same regbank?");
1188 LLT Ty = MRI.getType(True);
1189 if (Ty.isVector())
1190 return nullptr;
1191 const unsigned Size = Ty.getSizeInBits();
1192 assert((Size == 32 || Size == 64) &&
1193 "Expected 32 bit or 64 bit select only?");
1194 const bool Is32Bit = Size == 32;
1195 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1196 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1197 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1199 return &*FCSel;
1200 }
1201
1202 // By default, we'll try and emit a CSEL.
1203 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1204 bool Optimized = false;
1205 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1206 &Optimized](Register &Reg, Register &OtherReg,
1207 bool Invert) {
1208 if (Optimized)
1209 return false;
1210
1211 // Attempt to fold:
1212 //
1213 // %sub = G_SUB 0, %x
1214 // %select = G_SELECT cc, %reg, %sub
1215 //
1216 // Into:
1217 // %select = CSNEG %reg, %x, cc
1218 Register MatchReg;
1219 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1220 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1221 Reg = MatchReg;
1222 if (Invert) {
1224 std::swap(Reg, OtherReg);
1225 }
1226 return true;
1227 }
1228
1229 // Attempt to fold:
1230 //
1231 // %xor = G_XOR %x, -1
1232 // %select = G_SELECT cc, %reg, %xor
1233 //
1234 // Into:
1235 // %select = CSINV %reg, %x, cc
1236 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1237 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1238 Reg = MatchReg;
1239 if (Invert) {
1241 std::swap(Reg, OtherReg);
1242 }
1243 return true;
1244 }
1245
1246 // Attempt to fold:
1247 //
1248 // %add = G_ADD %x, 1
1249 // %select = G_SELECT cc, %reg, %add
1250 //
1251 // Into:
1252 // %select = CSINC %reg, %x, cc
1253 if (mi_match(Reg, MRI,
1254 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1255 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1256 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1257 Reg = MatchReg;
1258 if (Invert) {
1260 std::swap(Reg, OtherReg);
1261 }
1262 return true;
1263 }
1264
1265 return false;
1266 };
1267
1268 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1269 // true/false values are constants.
1270 // FIXME: All of these patterns already exist in tablegen. We should be
1271 // able to import these.
1272 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1273 &Optimized]() {
1274 if (Optimized)
1275 return false;
1276 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1277 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1278 if (!TrueCst && !FalseCst)
1279 return false;
1280
1281 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1282 if (TrueCst && FalseCst) {
1283 int64_t T = TrueCst->Value.getSExtValue();
1284 int64_t F = FalseCst->Value.getSExtValue();
1285
1286 if (T == 0 && F == 1) {
1287 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1288 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1289 True = ZReg;
1290 False = ZReg;
1291 return true;
1292 }
1293
1294 if (T == 0 && F == -1) {
1295 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1296 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1297 True = ZReg;
1298 False = ZReg;
1299 return true;
1300 }
1301 }
1302
1303 if (TrueCst) {
1304 int64_t T = TrueCst->Value.getSExtValue();
1305 if (T == 1) {
1306 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1307 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1308 True = False;
1309 False = ZReg;
1311 return true;
1312 }
1313
1314 if (T == -1) {
1315 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1316 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1317 True = False;
1318 False = ZReg;
1320 return true;
1321 }
1322 }
1323
1324 if (FalseCst) {
1325 int64_t F = FalseCst->Value.getSExtValue();
1326 if (F == 1) {
1327 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1328 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1329 False = ZReg;
1330 return true;
1331 }
1332
1333 if (F == -1) {
1334 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1335 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1336 False = ZReg;
1337 return true;
1338 }
1339 }
1340 return false;
1341 };
1342
1343 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1344 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1345 Optimized |= TryOptSelectCst();
1346 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1348 return &*SelectInst;
1349}
1350
1352 switch (P) {
1353 default:
1354 llvm_unreachable("Unknown condition code!");
1355 case CmpInst::ICMP_NE:
1356 return AArch64CC::NE;
1357 case CmpInst::ICMP_EQ:
1358 return AArch64CC::EQ;
1359 case CmpInst::ICMP_SGT:
1360 return AArch64CC::GT;
1361 case CmpInst::ICMP_SGE:
1362 return AArch64CC::GE;
1363 case CmpInst::ICMP_SLT:
1364 return AArch64CC::LT;
1365 case CmpInst::ICMP_SLE:
1366 return AArch64CC::LE;
1367 case CmpInst::ICMP_UGT:
1368 return AArch64CC::HI;
1369 case CmpInst::ICMP_UGE:
1370 return AArch64CC::HS;
1371 case CmpInst::ICMP_ULT:
1372 return AArch64CC::LO;
1373 case CmpInst::ICMP_ULE:
1374 return AArch64CC::LS;
1375 }
1376}
1377
1378/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1380 AArch64CC::CondCode &CondCode,
1381 AArch64CC::CondCode &CondCode2) {
1382 CondCode2 = AArch64CC::AL;
1383 switch (CC) {
1384 default:
1385 llvm_unreachable("Unknown FP condition!");
1386 case CmpInst::FCMP_OEQ:
1387 CondCode = AArch64CC::EQ;
1388 break;
1389 case CmpInst::FCMP_OGT:
1390 CondCode = AArch64CC::GT;
1391 break;
1392 case CmpInst::FCMP_OGE:
1393 CondCode = AArch64CC::GE;
1394 break;
1395 case CmpInst::FCMP_OLT:
1396 CondCode = AArch64CC::MI;
1397 break;
1398 case CmpInst::FCMP_OLE:
1399 CondCode = AArch64CC::LS;
1400 break;
1401 case CmpInst::FCMP_ONE:
1402 CondCode = AArch64CC::MI;
1403 CondCode2 = AArch64CC::GT;
1404 break;
1405 case CmpInst::FCMP_ORD:
1406 CondCode = AArch64CC::VC;
1407 break;
1408 case CmpInst::FCMP_UNO:
1409 CondCode = AArch64CC::VS;
1410 break;
1411 case CmpInst::FCMP_UEQ:
1412 CondCode = AArch64CC::EQ;
1413 CondCode2 = AArch64CC::VS;
1414 break;
1415 case CmpInst::FCMP_UGT:
1416 CondCode = AArch64CC::HI;
1417 break;
1418 case CmpInst::FCMP_UGE:
1419 CondCode = AArch64CC::PL;
1420 break;
1421 case CmpInst::FCMP_ULT:
1422 CondCode = AArch64CC::LT;
1423 break;
1424 case CmpInst::FCMP_ULE:
1425 CondCode = AArch64CC::LE;
1426 break;
1427 case CmpInst::FCMP_UNE:
1428 CondCode = AArch64CC::NE;
1429 break;
1430 }
1431}
1432
1433/// Convert an IR fp condition code to an AArch64 CC.
1434/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1435/// should be AND'ed instead of OR'ed.
1437 AArch64CC::CondCode &CondCode,
1438 AArch64CC::CondCode &CondCode2) {
1439 CondCode2 = AArch64CC::AL;
1440 switch (CC) {
1441 default:
1442 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1443 assert(CondCode2 == AArch64CC::AL);
1444 break;
1445 case CmpInst::FCMP_ONE:
1446 // (a one b)
1447 // == ((a olt b) || (a ogt b))
1448 // == ((a ord b) && (a une b))
1449 CondCode = AArch64CC::VC;
1450 CondCode2 = AArch64CC::NE;
1451 break;
1452 case CmpInst::FCMP_UEQ:
1453 // (a ueq b)
1454 // == ((a uno b) || (a oeq b))
1455 // == ((a ule b) && (a uge b))
1456 CondCode = AArch64CC::PL;
1457 CondCode2 = AArch64CC::LE;
1458 break;
1459 }
1460}
1461
1462/// Return a register which can be used as a bit to test in a TB(N)Z.
1463static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1465 assert(Reg.isValid() && "Expected valid register!");
1466 bool HasZext = false;
1467 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1468 unsigned Opc = MI->getOpcode();
1469
1470 if (!MI->getOperand(0).isReg() ||
1471 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1472 break;
1473
1474 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1475 //
1476 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1477 // on the truncated x is the same as the bit number on x.
1478 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1479 Opc == TargetOpcode::G_TRUNC) {
1480 if (Opc == TargetOpcode::G_ZEXT)
1481 HasZext = true;
1482
1483 Register NextReg = MI->getOperand(1).getReg();
1484 // Did we find something worth folding?
1485 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1486 break;
1487
1488 // NextReg is worth folding. Keep looking.
1489 Reg = NextReg;
1490 continue;
1491 }
1492
1493 // Attempt to find a suitable operation with a constant on one side.
1494 std::optional<uint64_t> C;
1495 Register TestReg;
1496 switch (Opc) {
1497 default:
1498 break;
1499 case TargetOpcode::G_AND:
1500 case TargetOpcode::G_XOR: {
1501 TestReg = MI->getOperand(1).getReg();
1502 Register ConstantReg = MI->getOperand(2).getReg();
1503 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1504 if (!VRegAndVal) {
1505 // AND commutes, check the other side for a constant.
1506 // FIXME: Can we canonicalize the constant so that it's always on the
1507 // same side at some point earlier?
1508 std::swap(ConstantReg, TestReg);
1509 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1510 }
1511 if (VRegAndVal) {
1512 if (HasZext)
1513 C = VRegAndVal->Value.getZExtValue();
1514 else
1515 C = VRegAndVal->Value.getSExtValue();
1516 }
1517 break;
1518 }
1519 case TargetOpcode::G_ASHR:
1520 case TargetOpcode::G_LSHR:
1521 case TargetOpcode::G_SHL: {
1522 TestReg = MI->getOperand(1).getReg();
1523 auto VRegAndVal =
1524 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1525 if (VRegAndVal)
1526 C = VRegAndVal->Value.getSExtValue();
1527 break;
1528 }
1529 }
1530
1531 // Didn't find a constant or viable register. Bail out of the loop.
1532 if (!C || !TestReg.isValid())
1533 break;
1534
1535 // We found a suitable instruction with a constant. Check to see if we can
1536 // walk through the instruction.
1537 Register NextReg;
1538 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1539 switch (Opc) {
1540 default:
1541 break;
1542 case TargetOpcode::G_AND:
1543 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1544 if ((*C >> Bit) & 1)
1545 NextReg = TestReg;
1546 break;
1547 case TargetOpcode::G_SHL:
1548 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1549 // the type of the register.
1550 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1551 NextReg = TestReg;
1552 Bit = Bit - *C;
1553 }
1554 break;
1555 case TargetOpcode::G_ASHR:
1556 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1557 // in x
1558 NextReg = TestReg;
1559 Bit = Bit + *C;
1560 if (Bit >= TestRegSize)
1561 Bit = TestRegSize - 1;
1562 break;
1563 case TargetOpcode::G_LSHR:
1564 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1565 if ((Bit + *C) < TestRegSize) {
1566 NextReg = TestReg;
1567 Bit = Bit + *C;
1568 }
1569 break;
1570 case TargetOpcode::G_XOR:
1571 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1572 // appropriate.
1573 //
1574 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1575 //
1576 // tbz x', b -> tbnz x, b
1577 //
1578 // Because x' only has the b-th bit set if x does not.
1579 if ((*C >> Bit) & 1)
1580 Invert = !Invert;
1581 NextReg = TestReg;
1582 break;
1583 }
1584
1585 // Check if we found anything worth folding.
1586 if (!NextReg.isValid())
1587 return Reg;
1588 Reg = NextReg;
1589 }
1590
1591 return Reg;
1592}
1593
1594MachineInstr *AArch64InstructionSelector::emitTestBit(
1595 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1596 MachineIRBuilder &MIB) const {
1597 assert(TestReg.isValid());
1598 assert(ProduceNonFlagSettingCondBr &&
1599 "Cannot emit TB(N)Z with speculation tracking!");
1600 MachineRegisterInfo &MRI = *MIB.getMRI();
1601
1602 // Attempt to optimize the test bit by walking over instructions.
1603 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1604 LLT Ty = MRI.getType(TestReg);
1605 unsigned Size = Ty.getSizeInBits();
1606 assert(!Ty.isVector() && "Expected a scalar!");
1607 assert(Bit < 64 && "Bit is too large!");
1608
1609 // When the test register is a 64-bit register, we have to narrow to make
1610 // TBNZW work.
1611 bool UseWReg = Bit < 32;
1612 unsigned NecessarySize = UseWReg ? 32 : 64;
1613 if (Size != NecessarySize)
1614 TestReg = moveScalarRegClass(
1615 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1616 MIB);
1617
1618 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1619 {AArch64::TBZW, AArch64::TBNZW}};
1620 unsigned Opc = OpcTable[UseWReg][IsNegative];
1621 auto TestBitMI =
1622 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1623 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1624 return &*TestBitMI;
1625}
1626
1627bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1628 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1629 MachineIRBuilder &MIB) const {
1630 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1631 // Given something like this:
1632 //
1633 // %x = ...Something...
1634 // %one = G_CONSTANT i64 1
1635 // %zero = G_CONSTANT i64 0
1636 // %and = G_AND %x, %one
1637 // %cmp = G_ICMP intpred(ne), %and, %zero
1638 // %cmp_trunc = G_TRUNC %cmp
1639 // G_BRCOND %cmp_trunc, %bb.3
1640 //
1641 // We want to try and fold the AND into the G_BRCOND and produce either a
1642 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1643 //
1644 // In this case, we'd get
1645 //
1646 // TBNZ %x %bb.3
1647 //
1648
1649 // Check if the AND has a constant on its RHS which we can use as a mask.
1650 // If it's a power of 2, then it's the same as checking a specific bit.
1651 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1652 auto MaybeBit = getIConstantVRegValWithLookThrough(
1653 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1654 if (!MaybeBit)
1655 return false;
1656
1657 int32_t Bit = MaybeBit->Value.exactLogBase2();
1658 if (Bit < 0)
1659 return false;
1660
1661 Register TestReg = AndInst.getOperand(1).getReg();
1662
1663 // Emit a TB(N)Z.
1664 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1665 return true;
1666}
1667
1668MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1669 bool IsNegative,
1670 MachineBasicBlock *DestMBB,
1671 MachineIRBuilder &MIB) const {
1672 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1673 MachineRegisterInfo &MRI = *MIB.getMRI();
1674 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1675 AArch64::GPRRegBankID &&
1676 "Expected GPRs only?");
1677 auto Ty = MRI.getType(CompareReg);
1678 unsigned Width = Ty.getSizeInBits();
1679 assert(!Ty.isVector() && "Expected scalar only?");
1680 assert(Width <= 64 && "Expected width to be at most 64?");
1681 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1682 {AArch64::CBNZW, AArch64::CBNZX}};
1683 unsigned Opc = OpcTable[IsNegative][Width == 64];
1684 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1685 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1686 return &*BranchMI;
1687}
1688
1689bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1690 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1691 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1692 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1693 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1694 // totally clean. Some of them require two branches to implement.
1695 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1696 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1697 Pred);
1698 AArch64CC::CondCode CC1, CC2;
1699 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1700 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1701 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1702 if (CC2 != AArch64CC::AL)
1703 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1704 I.eraseFromParent();
1705 return true;
1706}
1707
1708bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1709 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1710 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1711 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1712 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1713 //
1714 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1715 // instructions will not be produced, as they are conditional branch
1716 // instructions that do not set flags.
1717 if (!ProduceNonFlagSettingCondBr)
1718 return false;
1719
1720 MachineRegisterInfo &MRI = *MIB.getMRI();
1721 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1722 auto Pred =
1723 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1724 Register LHS = ICmp.getOperand(2).getReg();
1725 Register RHS = ICmp.getOperand(3).getReg();
1726
1727 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1728 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1729 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1730
1731 // When we can emit a TB(N)Z, prefer that.
1732 //
1733 // Handle non-commutative condition codes first.
1734 // Note that we don't want to do this when we have a G_AND because it can
1735 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1736 if (VRegAndVal && !AndInst) {
1737 int64_t C = VRegAndVal->Value.getSExtValue();
1738
1739 // When we have a greater-than comparison, we can just test if the msb is
1740 // zero.
1741 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1742 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1743 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1744 I.eraseFromParent();
1745 return true;
1746 }
1747
1748 // When we have a less than comparison, we can just test if the msb is not
1749 // zero.
1750 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1751 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1752 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1753 I.eraseFromParent();
1754 return true;
1755 }
1756
1757 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1758 // we can test if the msb is zero.
1759 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1760 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1761 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1762 I.eraseFromParent();
1763 return true;
1764 }
1765 }
1766
1767 // Attempt to handle commutative condition codes. Right now, that's only
1768 // eq/ne.
1769 if (ICmpInst::isEquality(Pred)) {
1770 if (!VRegAndVal) {
1771 std::swap(RHS, LHS);
1772 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1773 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1774 }
1775
1776 if (VRegAndVal && VRegAndVal->Value == 0) {
1777 // If there's a G_AND feeding into this branch, try to fold it away by
1778 // emitting a TB(N)Z instead.
1779 //
1780 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1781 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1782 // would be redundant.
1783 if (AndInst &&
1784 tryOptAndIntoCompareBranch(
1785 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1786 I.eraseFromParent();
1787 return true;
1788 }
1789
1790 // Otherwise, try to emit a CB(N)Z instead.
1791 auto LHSTy = MRI.getType(LHS);
1792 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1793 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1794 I.eraseFromParent();
1795 return true;
1796 }
1797 }
1798 }
1799
1800 return false;
1801}
1802
1803bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1804 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1805 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1806 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1807 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1808 return true;
1809
1810 // Couldn't optimize. Emit a compare + a Bcc.
1811 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1812 auto PredOp = ICmp.getOperand(1);
1813 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1815 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1816 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1817 I.eraseFromParent();
1818 return true;
1819}
1820
1821bool AArch64InstructionSelector::selectCompareBranch(
1823 Register CondReg = I.getOperand(0).getReg();
1824 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1825 // Try to select the G_BRCOND using whatever is feeding the condition if
1826 // possible.
1827 unsigned CCMIOpc = CCMI->getOpcode();
1828 if (CCMIOpc == TargetOpcode::G_FCMP)
1829 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1830 if (CCMIOpc == TargetOpcode::G_ICMP)
1831 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1832
1833 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1834 // instructions will not be produced, as they are conditional branch
1835 // instructions that do not set flags.
1836 if (ProduceNonFlagSettingCondBr) {
1837 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1838 I.getOperand(1).getMBB(), MIB);
1839 I.eraseFromParent();
1840 return true;
1841 }
1842
1843 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1844 auto TstMI =
1845 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1847 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1849 .addMBB(I.getOperand(1).getMBB());
1850 I.eraseFromParent();
1851 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1852}
1853
1854/// Returns the element immediate value of a vector shift operand if found.
1855/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1856static std::optional<int64_t> getVectorShiftImm(Register Reg,
1858 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1859 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1860 return getAArch64VectorSplatScalar(*OpMI, MRI);
1861}
1862
1863/// Matches and returns the shift immediate value for a SHL instruction given
1864/// a shift operand.
1865static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1867 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1868 if (!ShiftImm)
1869 return std::nullopt;
1870 // Check the immediate is in range for a SHL.
1871 int64_t Imm = *ShiftImm;
1872 if (Imm < 0)
1873 return std::nullopt;
1874 switch (SrcTy.getElementType().getSizeInBits()) {
1875 default:
1876 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1877 return std::nullopt;
1878 case 8:
1879 if (Imm > 7)
1880 return std::nullopt;
1881 break;
1882 case 16:
1883 if (Imm > 15)
1884 return std::nullopt;
1885 break;
1886 case 32:
1887 if (Imm > 31)
1888 return std::nullopt;
1889 break;
1890 case 64:
1891 if (Imm > 63)
1892 return std::nullopt;
1893 break;
1894 }
1895 return Imm;
1896}
1897
1898bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1900 assert(I.getOpcode() == TargetOpcode::G_SHL);
1901 Register DstReg = I.getOperand(0).getReg();
1902 const LLT Ty = MRI.getType(DstReg);
1903 Register Src1Reg = I.getOperand(1).getReg();
1904 Register Src2Reg = I.getOperand(2).getReg();
1905
1906 if (!Ty.isVector())
1907 return false;
1908
1909 // Check if we have a vector of constants on RHS that we can select as the
1910 // immediate form.
1911 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1912
1913 unsigned Opc = 0;
1914 if (Ty == LLT::fixed_vector(2, 64)) {
1915 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1916 } else if (Ty == LLT::fixed_vector(4, 32)) {
1917 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1918 } else if (Ty == LLT::fixed_vector(2, 32)) {
1919 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1920 } else if (Ty == LLT::fixed_vector(4, 16)) {
1921 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1922 } else if (Ty == LLT::fixed_vector(8, 16)) {
1923 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1924 } else if (Ty == LLT::fixed_vector(16, 8)) {
1925 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1926 } else if (Ty == LLT::fixed_vector(8, 8)) {
1927 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1928 } else {
1929 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1930 return false;
1931 }
1932
1933 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1934 if (ImmVal)
1935 Shl.addImm(*ImmVal);
1936 else
1937 Shl.addUse(Src2Reg);
1939 I.eraseFromParent();
1940 return true;
1941}
1942
1943bool AArch64InstructionSelector::selectVectorAshrLshr(
1945 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1946 I.getOpcode() == TargetOpcode::G_LSHR);
1947 Register DstReg = I.getOperand(0).getReg();
1948 const LLT Ty = MRI.getType(DstReg);
1949 Register Src1Reg = I.getOperand(1).getReg();
1950 Register Src2Reg = I.getOperand(2).getReg();
1951
1952 if (!Ty.isVector())
1953 return false;
1954
1955 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1956
1957 // We expect the immediate case to be lowered in the PostLegalCombiner to
1958 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1959
1960 // There is not a shift right register instruction, but the shift left
1961 // register instruction takes a signed value, where negative numbers specify a
1962 // right shift.
1963
1964 unsigned Opc = 0;
1965 unsigned NegOpc = 0;
1966 const TargetRegisterClass *RC =
1967 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1968 if (Ty == LLT::fixed_vector(2, 64)) {
1969 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1970 NegOpc = AArch64::NEGv2i64;
1971 } else if (Ty == LLT::fixed_vector(4, 32)) {
1972 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1973 NegOpc = AArch64::NEGv4i32;
1974 } else if (Ty == LLT::fixed_vector(2, 32)) {
1975 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1976 NegOpc = AArch64::NEGv2i32;
1977 } else if (Ty == LLT::fixed_vector(4, 16)) {
1978 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1979 NegOpc = AArch64::NEGv4i16;
1980 } else if (Ty == LLT::fixed_vector(8, 16)) {
1981 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1982 NegOpc = AArch64::NEGv8i16;
1983 } else if (Ty == LLT::fixed_vector(16, 8)) {
1984 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1985 NegOpc = AArch64::NEGv16i8;
1986 } else if (Ty == LLT::fixed_vector(8, 8)) {
1987 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1988 NegOpc = AArch64::NEGv8i8;
1989 } else {
1990 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1991 return false;
1992 }
1993
1994 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1996 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1998 I.eraseFromParent();
1999 return true;
2000}
2001
2002bool AArch64InstructionSelector::selectVaStartAAPCS(
2004
2005 if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
2006 MF.getFunction().isVarArg()))
2007 return false;
2008
2009 // The layout of the va_list struct is specified in the AArch64 Procedure Call
2010 // Standard, section 10.1.5.
2011
2012 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2013 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
2014 const auto *PtrRegClass =
2015 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
2016
2017 const MCInstrDesc &MCIDAddAddr =
2018 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
2019 const MCInstrDesc &MCIDStoreAddr =
2020 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
2021
2022 /*
2023 * typedef struct va_list {
2024 * void * stack; // next stack param
2025 * void * gr_top; // end of GP arg reg save area
2026 * void * vr_top; // end of FP/SIMD arg reg save area
2027 * int gr_offs; // offset from gr_top to next GP register arg
2028 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
2029 * } va_list;
2030 */
2031 const auto VAList = I.getOperand(0).getReg();
2032
2033 // Our current offset in bytes from the va_list struct (VAList).
2034 unsigned OffsetBytes = 0;
2035
2036 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
2037 // and increment OffsetBytes by PtrSize.
2038 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
2039 const Register Top = MRI.createVirtualRegister(PtrRegClass);
2040 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
2041 .addDef(Top)
2042 .addFrameIndex(FrameIndex)
2043 .addImm(Imm)
2044 .addImm(0);
2046
2047 const auto *MMO = *I.memoperands_begin();
2048 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
2049 .addUse(Top)
2050 .addUse(VAList)
2051 .addImm(OffsetBytes / PtrSize)
2053 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2054 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2056
2057 OffsetBytes += PtrSize;
2058 };
2059
2060 // void* stack at offset 0
2061 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2062
2063 // void* gr_top at offset 8 (4 on ILP32)
2064 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2065 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2066
2067 // void* vr_top at offset 16 (8 on ILP32)
2068 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2069 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2070
2071 // Helper function to store a 4-byte integer constant to VAList at offset
2072 // OffsetBytes, and increment OffsetBytes by 4.
2073 const auto PushIntConstant = [&](const int32_t Value) {
2074 constexpr int IntSize = 4;
2075 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2076 auto MIB =
2077 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2078 .addDef(Temp)
2079 .addImm(Value);
2081
2082 const auto *MMO = *I.memoperands_begin();
2083 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2084 .addUse(Temp)
2085 .addUse(VAList)
2086 .addImm(OffsetBytes / IntSize)
2088 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2089 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2091 OffsetBytes += IntSize;
2092 };
2093
2094 // int gr_offs at offset 24 (12 on ILP32)
2095 PushIntConstant(-static_cast<int32_t>(GPRSize));
2096
2097 // int vr_offs at offset 28 (16 on ILP32)
2098 PushIntConstant(-static_cast<int32_t>(FPRSize));
2099
2100 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2101
2102 I.eraseFromParent();
2103 return true;
2104}
2105
2106bool AArch64InstructionSelector::selectVaStartDarwin(
2109 Register ListReg = I.getOperand(0).getReg();
2110
2111 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2112
2113 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2116 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2117 ? FuncInfo->getVarArgsGPRIndex()
2118 : FuncInfo->getVarArgsStackIndex();
2119 }
2120
2121 auto MIB =
2122 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2123 .addDef(ArgsAddrReg)
2124 .addFrameIndex(FrameIdx)
2125 .addImm(0)
2126 .addImm(0);
2127
2129
2130 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2131 .addUse(ArgsAddrReg)
2132 .addUse(ListReg)
2133 .addImm(0)
2134 .addMemOperand(*I.memoperands_begin());
2135
2137 I.eraseFromParent();
2138 return true;
2139}
2140
2141void AArch64InstructionSelector::materializeLargeCMVal(
2142 MachineInstr &I, const Value *V, unsigned OpFlags) {
2143 MachineBasicBlock &MBB = *I.getParent();
2144 MachineFunction &MF = *MBB.getParent();
2146
2147 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2148 MovZ->addOperand(MF, I.getOperand(1));
2149 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2151 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2153
2154 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2155 Register ForceDstReg) {
2156 Register DstReg = ForceDstReg
2157 ? ForceDstReg
2158 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2159 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2160 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2162 GV, MovZ->getOperand(1).getOffset(), Flags));
2163 } else {
2164 MovI->addOperand(
2165 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2166 MovZ->getOperand(1).getOffset(), Flags));
2167 }
2168 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2170 return DstReg;
2171 };
2172 Register DstReg = BuildMovK(MovZ.getReg(0),
2174 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2175 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2176}
2177
2178bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2179 MachineBasicBlock &MBB = *I.getParent();
2180 MachineFunction &MF = *MBB.getParent();
2182
2183 switch (I.getOpcode()) {
2184 case TargetOpcode::G_STORE: {
2185 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2186 MachineOperand &SrcOp = I.getOperand(0);
2187 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2188 // Allow matching with imported patterns for stores of pointers. Unlike
2189 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2190 // and constrain.
2191 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2192 Register NewSrc = Copy.getReg(0);
2193 SrcOp.setReg(NewSrc);
2194 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2195 Changed = true;
2196 }
2197 return Changed;
2198 }
2199 case TargetOpcode::G_PTR_ADD:
2200 return convertPtrAddToAdd(I, MRI);
2201 case TargetOpcode::G_LOAD: {
2202 // For scalar loads of pointers, we try to convert the dest type from p0
2203 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2204 // conversion, this should be ok because all users should have been
2205 // selected already, so the type doesn't matter for them.
2206 Register DstReg = I.getOperand(0).getReg();
2207 const LLT DstTy = MRI.getType(DstReg);
2208 if (!DstTy.isPointer())
2209 return false;
2210 MRI.setType(DstReg, LLT::scalar(64));
2211 return true;
2212 }
2213 case AArch64::G_DUP: {
2214 // Convert the type from p0 to s64 to help selection.
2215 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2216 if (!DstTy.isPointerVector())
2217 return false;
2218 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2219 MRI.setType(I.getOperand(0).getReg(),
2220 DstTy.changeElementType(LLT::scalar(64)));
2221 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2222 I.getOperand(1).setReg(NewSrc.getReg(0));
2223 return true;
2224 }
2225 case AArch64::G_INSERT_VECTOR_ELT: {
2226 // Convert the type from p0 to s64 to help selection.
2227 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2228 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2229 if (!SrcVecTy.isPointerVector())
2230 return false;
2231 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2232 MRI.setType(I.getOperand(1).getReg(),
2233 DstTy.changeElementType(LLT::scalar(64)));
2234 MRI.setType(I.getOperand(0).getReg(),
2235 DstTy.changeElementType(LLT::scalar(64)));
2236 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2237 I.getOperand(2).setReg(NewSrc.getReg(0));
2238 return true;
2239 }
2240 case TargetOpcode::G_UITOFP:
2241 case TargetOpcode::G_SITOFP: {
2242 // If both source and destination regbanks are FPR, then convert the opcode
2243 // to G_SITOF so that the importer can select it to an fpr variant.
2244 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2245 // copy.
2246 Register SrcReg = I.getOperand(1).getReg();
2247 LLT SrcTy = MRI.getType(SrcReg);
2248 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2249 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2250 return false;
2251
2252 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2253 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2254 I.setDesc(TII.get(AArch64::G_SITOF));
2255 else
2256 I.setDesc(TII.get(AArch64::G_UITOF));
2257 return true;
2258 }
2259 return false;
2260 }
2261 default:
2262 return false;
2263 }
2264}
2265
2266/// This lowering tries to look for G_PTR_ADD instructions and then converts
2267/// them to a standard G_ADD with a COPY on the source.
2268///
2269/// The motivation behind this is to expose the add semantics to the imported
2270/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2271/// because the selector works bottom up, uses before defs. By the time we
2272/// end up trying to select a G_PTR_ADD, we should have already attempted to
2273/// fold this into addressing modes and were therefore unsuccessful.
2274bool AArch64InstructionSelector::convertPtrAddToAdd(
2276 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2277 Register DstReg = I.getOperand(0).getReg();
2278 Register AddOp1Reg = I.getOperand(1).getReg();
2279 const LLT PtrTy = MRI.getType(DstReg);
2280 if (PtrTy.getAddressSpace() != 0)
2281 return false;
2282
2283 const LLT CastPtrTy =
2284 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2285 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2286 // Set regbanks on the registers.
2287 if (PtrTy.isVector())
2288 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2289 else
2290 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2291
2292 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2293 // %dst(intty) = G_ADD %intbase, off
2294 I.setDesc(TII.get(TargetOpcode::G_ADD));
2295 MRI.setType(DstReg, CastPtrTy);
2296 I.getOperand(1).setReg(PtrToInt.getReg(0));
2297 if (!select(*PtrToInt)) {
2298 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2299 return false;
2300 }
2301
2302 // Also take the opportunity here to try to do some optimization.
2303 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2304 Register NegatedReg;
2305 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2306 return true;
2307 I.getOperand(2).setReg(NegatedReg);
2308 I.setDesc(TII.get(TargetOpcode::G_SUB));
2309 return true;
2310}
2311
2312bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2314 // We try to match the immediate variant of LSL, which is actually an alias
2315 // for a special case of UBFM. Otherwise, we fall back to the imported
2316 // selector which will match the register variant.
2317 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2318 const auto &MO = I.getOperand(2);
2319 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2320 if (!VRegAndVal)
2321 return false;
2322
2323 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2324 if (DstTy.isVector())
2325 return false;
2326 bool Is64Bit = DstTy.getSizeInBits() == 64;
2327 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2328 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2329
2330 if (!Imm1Fn || !Imm2Fn)
2331 return false;
2332
2333 auto NewI =
2334 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2335 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2336
2337 for (auto &RenderFn : *Imm1Fn)
2338 RenderFn(NewI);
2339 for (auto &RenderFn : *Imm2Fn)
2340 RenderFn(NewI);
2341
2342 I.eraseFromParent();
2343 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2344}
2345
2346bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2348 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2349 // If we're storing a scalar, it doesn't matter what register bank that
2350 // scalar is on. All that matters is the size.
2351 //
2352 // So, if we see something like this (with a 32-bit scalar as an example):
2353 //
2354 // %x:gpr(s32) = ... something ...
2355 // %y:fpr(s32) = COPY %x:gpr(s32)
2356 // G_STORE %y:fpr(s32)
2357 //
2358 // We can fix this up into something like this:
2359 //
2360 // G_STORE %x:gpr(s32)
2361 //
2362 // And then continue the selection process normally.
2363 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2364 if (!DefDstReg.isValid())
2365 return false;
2366 LLT DefDstTy = MRI.getType(DefDstReg);
2367 Register StoreSrcReg = I.getOperand(0).getReg();
2368 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2369
2370 // If we get something strange like a physical register, then we shouldn't
2371 // go any further.
2372 if (!DefDstTy.isValid())
2373 return false;
2374
2375 // Are the source and dst types the same size?
2376 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2377 return false;
2378
2379 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2380 RBI.getRegBank(DefDstReg, MRI, TRI))
2381 return false;
2382
2383 // We have a cross-bank copy, which is entering a store. Let's fold it.
2384 I.getOperand(0).setReg(DefDstReg);
2385 return true;
2386}
2387
2388bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2389 assert(I.getParent() && "Instruction should be in a basic block!");
2390 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2391
2392 MachineBasicBlock &MBB = *I.getParent();
2393 MachineFunction &MF = *MBB.getParent();
2395
2396 switch (I.getOpcode()) {
2397 case AArch64::G_DUP: {
2398 // Before selecting a DUP instruction, check if it is better selected as a
2399 // MOV or load from a constant pool.
2400 Register Src = I.getOperand(1).getReg();
2401 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2402 if (!ValAndVReg)
2403 return false;
2404 LLVMContext &Ctx = MF.getFunction().getContext();
2405 Register Dst = I.getOperand(0).getReg();
2407 MRI.getType(Dst).getNumElements(),
2408 ConstantInt::get(
2409 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2410 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2411 if (!emitConstantVector(Dst, CV, MIB, MRI))
2412 return false;
2413 I.eraseFromParent();
2414 return true;
2415 }
2416 case TargetOpcode::G_SEXT:
2417 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2418 // over a normal extend.
2419 if (selectUSMovFromExtend(I, MRI))
2420 return true;
2421 return false;
2422 case TargetOpcode::G_BR:
2423 return false;
2424 case TargetOpcode::G_SHL:
2425 return earlySelectSHL(I, MRI);
2426 case TargetOpcode::G_CONSTANT: {
2427 bool IsZero = false;
2428 if (I.getOperand(1).isCImm())
2429 IsZero = I.getOperand(1).getCImm()->isZero();
2430 else if (I.getOperand(1).isImm())
2431 IsZero = I.getOperand(1).getImm() == 0;
2432
2433 if (!IsZero)
2434 return false;
2435
2436 Register DefReg = I.getOperand(0).getReg();
2437 LLT Ty = MRI.getType(DefReg);
2438 if (Ty.getSizeInBits() == 64) {
2439 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2440 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2441 } else if (Ty.getSizeInBits() == 32) {
2442 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2443 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2444 } else
2445 return false;
2446
2447 I.setDesc(TII.get(TargetOpcode::COPY));
2448 return true;
2449 }
2450
2451 case TargetOpcode::G_ADD: {
2452 // Check if this is being fed by a G_ICMP on either side.
2453 //
2454 // (cmp pred, x, y) + z
2455 //
2456 // In the above case, when the cmp is true, we increment z by 1. So, we can
2457 // fold the add into the cset for the cmp by using cinc.
2458 //
2459 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2460 Register AddDst = I.getOperand(0).getReg();
2461 Register AddLHS = I.getOperand(1).getReg();
2462 Register AddRHS = I.getOperand(2).getReg();
2463 // Only handle scalars.
2464 LLT Ty = MRI.getType(AddLHS);
2465 if (Ty.isVector())
2466 return false;
2467 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2468 // bits.
2469 unsigned Size = Ty.getSizeInBits();
2470 if (Size != 32 && Size != 64)
2471 return false;
2472 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2473 if (!MRI.hasOneNonDBGUse(Reg))
2474 return nullptr;
2475 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2476 // compare.
2477 if (Size == 32)
2478 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2479 // We model scalar compares using 32-bit destinations right now.
2480 // If it's a 64-bit compare, it'll have 64-bit sources.
2481 Register ZExt;
2482 if (!mi_match(Reg, MRI,
2484 return nullptr;
2485 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2486 if (!Cmp ||
2487 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2488 return nullptr;
2489 return Cmp;
2490 };
2491 // Try to match
2492 // z + (cmp pred, x, y)
2493 MachineInstr *Cmp = MatchCmp(AddRHS);
2494 if (!Cmp) {
2495 // (cmp pred, x, y) + z
2496 std::swap(AddLHS, AddRHS);
2497 Cmp = MatchCmp(AddRHS);
2498 if (!Cmp)
2499 return false;
2500 }
2501 auto &PredOp = Cmp->getOperand(1);
2502 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2503 const AArch64CC::CondCode InvCC =
2506 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2507 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2508 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2509 I.eraseFromParent();
2510 return true;
2511 }
2512 case TargetOpcode::G_OR: {
2513 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2514 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2515 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2516 Register Dst = I.getOperand(0).getReg();
2517 LLT Ty = MRI.getType(Dst);
2518
2519 if (!Ty.isScalar())
2520 return false;
2521
2522 unsigned Size = Ty.getSizeInBits();
2523 if (Size != 32 && Size != 64)
2524 return false;
2525
2526 Register ShiftSrc;
2527 int64_t ShiftImm;
2528 Register MaskSrc;
2529 int64_t MaskImm;
2530 if (!mi_match(
2531 Dst, MRI,
2532 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2533 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2534 return false;
2535
2536 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2537 return false;
2538
2539 int64_t Immr = Size - ShiftImm;
2540 int64_t Imms = Size - ShiftImm - 1;
2541 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2542 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2543 I.eraseFromParent();
2544 return true;
2545 }
2546 case TargetOpcode::G_FENCE: {
2547 if (I.getOperand(1).getImm() == 0)
2548 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2549 else
2550 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2551 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2552 I.eraseFromParent();
2553 return true;
2554 }
2555 default:
2556 return false;
2557 }
2558}
2559
2560bool AArch64InstructionSelector::select(MachineInstr &I) {
2561 assert(I.getParent() && "Instruction should be in a basic block!");
2562 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2563
2564 MachineBasicBlock &MBB = *I.getParent();
2565 MachineFunction &MF = *MBB.getParent();
2567
2568 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2569 if (Subtarget->requiresStrictAlign()) {
2570 // We don't support this feature yet.
2571 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2572 return false;
2573 }
2574
2576
2577 unsigned Opcode = I.getOpcode();
2578 // G_PHI requires same handling as PHI
2579 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2580 // Certain non-generic instructions also need some special handling.
2581
2582 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2584
2585 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2586 const Register DefReg = I.getOperand(0).getReg();
2587 const LLT DefTy = MRI.getType(DefReg);
2588
2589 const RegClassOrRegBank &RegClassOrBank =
2590 MRI.getRegClassOrRegBank(DefReg);
2591
2592 const TargetRegisterClass *DefRC =
2593 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
2594 if (!DefRC) {
2595 if (!DefTy.isValid()) {
2596 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2597 return false;
2598 }
2599 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2600 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2601 if (!DefRC) {
2602 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2603 return false;
2604 }
2605 }
2606
2607 I.setDesc(TII.get(TargetOpcode::PHI));
2608
2609 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2610 }
2611
2612 if (I.isCopy())
2613 return selectCopy(I, TII, MRI, TRI, RBI);
2614
2615 if (I.isDebugInstr())
2616 return selectDebugInstr(I, MRI, RBI);
2617
2618 return true;
2619 }
2620
2621
2622 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2623 LLVM_DEBUG(
2624 dbgs() << "Generic instruction has unexpected implicit operands\n");
2625 return false;
2626 }
2627
2628 // Try to do some lowering before we start instruction selecting. These
2629 // lowerings are purely transformations on the input G_MIR and so selection
2630 // must continue after any modification of the instruction.
2631 if (preISelLower(I)) {
2632 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2633 }
2634
2635 // There may be patterns where the importer can't deal with them optimally,
2636 // but does select it to a suboptimal sequence so our custom C++ selection
2637 // code later never has a chance to work on it. Therefore, we have an early
2638 // selection attempt here to give priority to certain selection routines
2639 // over the imported ones.
2640 if (earlySelect(I))
2641 return true;
2642
2643 if (selectImpl(I, *CoverageInfo))
2644 return true;
2645
2646 LLT Ty =
2647 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2648
2649 switch (Opcode) {
2650 case TargetOpcode::G_SBFX:
2651 case TargetOpcode::G_UBFX: {
2652 static const unsigned OpcTable[2][2] = {
2653 {AArch64::UBFMWri, AArch64::UBFMXri},
2654 {AArch64::SBFMWri, AArch64::SBFMXri}};
2655 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2656 unsigned Size = Ty.getSizeInBits();
2657 unsigned Opc = OpcTable[IsSigned][Size == 64];
2658 auto Cst1 =
2659 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2660 assert(Cst1 && "Should have gotten a constant for src 1?");
2661 auto Cst2 =
2662 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2663 assert(Cst2 && "Should have gotten a constant for src 2?");
2664 auto LSB = Cst1->Value.getZExtValue();
2665 auto Width = Cst2->Value.getZExtValue();
2666 auto BitfieldInst =
2667 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2668 .addImm(LSB)
2669 .addImm(LSB + Width - 1);
2670 I.eraseFromParent();
2671 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2672 }
2673 case TargetOpcode::G_BRCOND:
2674 return selectCompareBranch(I, MF, MRI);
2675
2676 case TargetOpcode::G_BRINDIRECT: {
2677 const Function &Fn = MF.getFunction();
2678 if (std::optional<uint16_t> BADisc =
2679 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2680 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2681 MI.addImm(AArch64PACKey::IA);
2682 MI.addImm(*BADisc);
2683 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2684 I.eraseFromParent();
2686 }
2687 I.setDesc(TII.get(AArch64::BR));
2689 }
2690
2691 case TargetOpcode::G_BRJT:
2692 return selectBrJT(I, MRI);
2693
2694 case AArch64::G_ADD_LOW: {
2695 // This op may have been separated from it's ADRP companion by the localizer
2696 // or some other code motion pass. Given that many CPUs will try to
2697 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2698 // which will later be expanded into an ADRP+ADD pair after scheduling.
2699 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2700 if (BaseMI->getOpcode() != AArch64::ADRP) {
2701 I.setDesc(TII.get(AArch64::ADDXri));
2702 I.addOperand(MachineOperand::CreateImm(0));
2704 }
2705 assert(TM.getCodeModel() == CodeModel::Small &&
2706 "Expected small code model");
2707 auto Op1 = BaseMI->getOperand(1);
2708 auto Op2 = I.getOperand(2);
2709 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2710 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2711 Op1.getTargetFlags())
2712 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2713 Op2.getTargetFlags());
2714 I.eraseFromParent();
2715 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2716 }
2717
2718 case TargetOpcode::G_FCONSTANT:
2719 case TargetOpcode::G_CONSTANT: {
2720 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2721
2722 const LLT s8 = LLT::scalar(8);
2723 const LLT s16 = LLT::scalar(16);
2724 const LLT s32 = LLT::scalar(32);
2725 const LLT s64 = LLT::scalar(64);
2726 const LLT s128 = LLT::scalar(128);
2727 const LLT p0 = LLT::pointer(0, 64);
2728
2729 const Register DefReg = I.getOperand(0).getReg();
2730 const LLT DefTy = MRI.getType(DefReg);
2731 const unsigned DefSize = DefTy.getSizeInBits();
2732 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2733
2734 // FIXME: Redundant check, but even less readable when factored out.
2735 if (isFP) {
2736 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2737 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2738 << " constant, expected: " << s16 << " or " << s32
2739 << " or " << s64 << " or " << s128 << '\n');
2740 return false;
2741 }
2742
2743 if (RB.getID() != AArch64::FPRRegBankID) {
2744 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2745 << " constant on bank: " << RB
2746 << ", expected: FPR\n");
2747 return false;
2748 }
2749
2750 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2751 // can be sure tablegen works correctly and isn't rescued by this code.
2752 // 0.0 is not covered by tablegen for FP128. So we will handle this
2753 // scenario in the code here.
2754 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2755 return false;
2756 } else {
2757 // s32 and s64 are covered by tablegen.
2758 if (Ty != p0 && Ty != s8 && Ty != s16) {
2759 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2760 << " constant, expected: " << s32 << ", " << s64
2761 << ", or " << p0 << '\n');
2762 return false;
2763 }
2764
2765 if (RB.getID() != AArch64::GPRRegBankID) {
2766 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2767 << " constant on bank: " << RB
2768 << ", expected: GPR\n");
2769 return false;
2770 }
2771 }
2772
2773 if (isFP) {
2774 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2775 // For 16, 64, and 128b values, emit a constant pool load.
2776 switch (DefSize) {
2777 default:
2778 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2779 case 32:
2780 case 64: {
2781 bool OptForSize = shouldOptForSize(&MF);
2782 const auto &TLI = MF.getSubtarget().getTargetLowering();
2783 // If TLI says that this fpimm is illegal, then we'll expand to a
2784 // constant pool load.
2785 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2786 EVT::getFloatingPointVT(DefSize), OptForSize))
2787 break;
2788 [[fallthrough]];
2789 }
2790 case 16:
2791 case 128: {
2792 auto *FPImm = I.getOperand(1).getFPImm();
2793 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2794 if (!LoadMI) {
2795 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2796 return false;
2797 }
2798 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2799 I.eraseFromParent();
2800 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2801 }
2802 }
2803
2804 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2805 // Either emit a FMOV, or emit a copy to emit a normal mov.
2806 const Register DefGPRReg = MRI.createVirtualRegister(
2807 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2808 MachineOperand &RegOp = I.getOperand(0);
2809 RegOp.setReg(DefGPRReg);
2810 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2811 MIB.buildCopy({DefReg}, {DefGPRReg});
2812
2813 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2814 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2815 return false;
2816 }
2817
2818 MachineOperand &ImmOp = I.getOperand(1);
2819 // FIXME: Is going through int64_t always correct?
2820 ImmOp.ChangeToImmediate(
2822 } else if (I.getOperand(1).isCImm()) {
2823 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2824 I.getOperand(1).ChangeToImmediate(Val);
2825 } else if (I.getOperand(1).isImm()) {
2826 uint64_t Val = I.getOperand(1).getImm();
2827 I.getOperand(1).ChangeToImmediate(Val);
2828 }
2829
2830 const unsigned MovOpc =
2831 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2832 I.setDesc(TII.get(MovOpc));
2834 return true;
2835 }
2836 case TargetOpcode::G_EXTRACT: {
2837 Register DstReg = I.getOperand(0).getReg();
2838 Register SrcReg = I.getOperand(1).getReg();
2839 LLT SrcTy = MRI.getType(SrcReg);
2840 LLT DstTy = MRI.getType(DstReg);
2841 (void)DstTy;
2842 unsigned SrcSize = SrcTy.getSizeInBits();
2843
2844 if (SrcTy.getSizeInBits() > 64) {
2845 // This should be an extract of an s128, which is like a vector extract.
2846 if (SrcTy.getSizeInBits() != 128)
2847 return false;
2848 // Only support extracting 64 bits from an s128 at the moment.
2849 if (DstTy.getSizeInBits() != 64)
2850 return false;
2851
2852 unsigned Offset = I.getOperand(2).getImm();
2853 if (Offset % 64 != 0)
2854 return false;
2855
2856 // Check we have the right regbank always.
2857 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2858 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2859 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2860
2861 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2862 auto NewI =
2863 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2864 .addUse(SrcReg, 0,
2865 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2866 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2867 AArch64::GPR64RegClass, NewI->getOperand(0));
2868 I.eraseFromParent();
2869 return true;
2870 }
2871
2872 // Emit the same code as a vector extract.
2873 // Offset must be a multiple of 64.
2874 unsigned LaneIdx = Offset / 64;
2875 MachineInstr *Extract = emitExtractVectorElt(
2876 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2877 if (!Extract)
2878 return false;
2879 I.eraseFromParent();
2880 return true;
2881 }
2882
2883 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2884 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2885 Ty.getSizeInBits() - 1);
2886
2887 if (SrcSize < 64) {
2888 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2889 "unexpected G_EXTRACT types");
2891 }
2892
2893 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2894 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2895 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2896 .addReg(DstReg, 0, AArch64::sub_32);
2897 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2898 AArch64::GPR32RegClass, MRI);
2899 I.getOperand(0).setReg(DstReg);
2900
2902 }
2903
2904 case TargetOpcode::G_INSERT: {
2905 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2906 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2907 unsigned DstSize = DstTy.getSizeInBits();
2908 // Larger inserts are vectors, same-size ones should be something else by
2909 // now (split up or turned into COPYs).
2910 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2911 return false;
2912
2913 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2914 unsigned LSB = I.getOperand(3).getImm();
2915 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2916 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2917 MachineInstrBuilder(MF, I).addImm(Width - 1);
2918
2919 if (DstSize < 64) {
2920 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2921 "unexpected G_INSERT types");
2923 }
2924
2925 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2926 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2927 TII.get(AArch64::SUBREG_TO_REG))
2928 .addDef(SrcReg)
2929 .addImm(0)
2930 .addUse(I.getOperand(2).getReg())
2931 .addImm(AArch64::sub_32);
2932 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2933 AArch64::GPR32RegClass, MRI);
2934 I.getOperand(2).setReg(SrcReg);
2935
2937 }
2938 case TargetOpcode::G_FRAME_INDEX: {
2939 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2940 if (Ty != LLT::pointer(0, 64)) {
2941 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2942 << ", expected: " << LLT::pointer(0, 64) << '\n');
2943 return false;
2944 }
2945 I.setDesc(TII.get(AArch64::ADDXri));
2946
2947 // MOs for a #0 shifted immediate.
2948 I.addOperand(MachineOperand::CreateImm(0));
2949 I.addOperand(MachineOperand::CreateImm(0));
2950
2952 }
2953
2954 case TargetOpcode::G_GLOBAL_VALUE: {
2955 const GlobalValue *GV = nullptr;
2956 unsigned OpFlags;
2957 if (I.getOperand(1).isSymbol()) {
2958 OpFlags = I.getOperand(1).getTargetFlags();
2959 // Currently only used by "RtLibUseGOT".
2960 assert(OpFlags == AArch64II::MO_GOT);
2961 } else {
2962 GV = I.getOperand(1).getGlobal();
2963 if (GV->isThreadLocal())
2964 return selectTLSGlobalValue(I, MRI);
2965 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2966 }
2967
2968 if (OpFlags & AArch64II::MO_GOT) {
2969 I.setDesc(TII.get(MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
2970 ? AArch64::LOADgotAUTH
2971 : AArch64::LOADgot));
2972 I.getOperand(1).setTargetFlags(OpFlags);
2973 } else if (TM.getCodeModel() == CodeModel::Large &&
2974 !TM.isPositionIndependent()) {
2975 // Materialize the global using movz/movk instructions.
2976 materializeLargeCMVal(I, GV, OpFlags);
2977 I.eraseFromParent();
2978 return true;
2979 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2980 I.setDesc(TII.get(AArch64::ADR));
2981 I.getOperand(1).setTargetFlags(OpFlags);
2982 } else {
2983 I.setDesc(TII.get(AArch64::MOVaddr));
2984 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2985 MachineInstrBuilder MIB(MF, I);
2986 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2988 }
2990 }
2991
2992 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2993 return selectPtrAuthGlobalValue(I, MRI);
2994
2995 case TargetOpcode::G_ZEXTLOAD:
2996 case TargetOpcode::G_LOAD:
2997 case TargetOpcode::G_STORE: {
2998 GLoadStore &LdSt = cast<GLoadStore>(I);
2999 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
3000 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
3001
3002 if (PtrTy != LLT::pointer(0, 64)) {
3003 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
3004 << ", expected: " << LLT::pointer(0, 64) << '\n');
3005 return false;
3006 }
3007
3008 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
3009 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
3010 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
3011
3012 // Need special instructions for atomics that affect ordering.
3013 if (Order != AtomicOrdering::NotAtomic &&
3014 Order != AtomicOrdering::Unordered &&
3015 Order != AtomicOrdering::Monotonic) {
3016 assert(!isa<GZExtLoad>(LdSt));
3017 assert(MemSizeInBytes <= 8 &&
3018 "128-bit atomics should already be custom-legalized");
3019
3020 if (isa<GLoad>(LdSt)) {
3021 static constexpr unsigned LDAPROpcodes[] = {
3022 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
3023 static constexpr unsigned LDAROpcodes[] = {
3024 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
3025 ArrayRef<unsigned> Opcodes =
3026 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
3027 ? LDAPROpcodes
3028 : LDAROpcodes;
3029 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
3030 } else {
3031 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
3032 AArch64::STLRW, AArch64::STLRX};
3033 Register ValReg = LdSt.getReg(0);
3034 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
3035 // Emit a subreg copy of 32 bits.
3036 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3037 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
3038 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
3039 I.getOperand(0).setReg(NewVal);
3040 }
3041 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
3042 }
3044 return true;
3045 }
3046
3047#ifndef NDEBUG
3048 const Register PtrReg = LdSt.getPointerReg();
3049 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
3050 // Check that the pointer register is valid.
3051 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
3052 "Load/Store pointer operand isn't a GPR");
3053 assert(MRI.getType(PtrReg).isPointer() &&
3054 "Load/Store pointer operand isn't a pointer");
3055#endif
3056
3057 const Register ValReg = LdSt.getReg(0);
3058 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3059 LLT ValTy = MRI.getType(ValReg);
3060
3061 // The code below doesn't support truncating stores, so we need to split it
3062 // again.
3063 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3064 unsigned SubReg;
3065 LLT MemTy = LdSt.getMMO().getMemoryType();
3066 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3067 if (!getSubRegForClass(RC, TRI, SubReg))
3068 return false;
3069
3070 // Generate a subreg copy.
3071 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3072 .addReg(ValReg, 0, SubReg)
3073 .getReg(0);
3074 RBI.constrainGenericRegister(Copy, *RC, MRI);
3075 LdSt.getOperand(0).setReg(Copy);
3076 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3077 // If this is an any-extending load from the FPR bank, split it into a regular
3078 // load + extend.
3079 if (RB.getID() == AArch64::FPRRegBankID) {
3080 unsigned SubReg;
3081 LLT MemTy = LdSt.getMMO().getMemoryType();
3082 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3083 if (!getSubRegForClass(RC, TRI, SubReg))
3084 return false;
3085 Register OldDst = LdSt.getReg(0);
3086 Register NewDst =
3087 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
3088 LdSt.getOperand(0).setReg(NewDst);
3089 MRI.setRegBank(NewDst, RB);
3090 // Generate a SUBREG_TO_REG to extend it.
3091 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3092 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3093 .addImm(0)
3094 .addUse(NewDst)
3095 .addImm(SubReg);
3096 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3097 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3098 MIB.setInstr(LdSt);
3099 ValTy = MemTy; // This is no longer an extending load.
3100 }
3101 }
3102
3103 // Helper lambda for partially selecting I. Either returns the original
3104 // instruction with an updated opcode, or a new instruction.
3105 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3106 bool IsStore = isa<GStore>(I);
3107 const unsigned NewOpc =
3108 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3109 if (NewOpc == I.getOpcode())
3110 return nullptr;
3111 // Check if we can fold anything into the addressing mode.
3112 auto AddrModeFns =
3113 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3114 if (!AddrModeFns) {
3115 // Can't fold anything. Use the original instruction.
3116 I.setDesc(TII.get(NewOpc));
3117 I.addOperand(MachineOperand::CreateImm(0));
3118 return &I;
3119 }
3120
3121 // Folded something. Create a new instruction and return it.
3122 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3123 Register CurValReg = I.getOperand(0).getReg();
3124 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3125 NewInst.cloneMemRefs(I);
3126 for (auto &Fn : *AddrModeFns)
3127 Fn(NewInst);
3128 I.eraseFromParent();
3129 return &*NewInst;
3130 };
3131
3132 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3133 if (!LoadStore)
3134 return false;
3135
3136 // If we're storing a 0, use WZR/XZR.
3137 if (Opcode == TargetOpcode::G_STORE) {
3139 LoadStore->getOperand(0).getReg(), MRI);
3140 if (CVal && CVal->Value == 0) {
3141 switch (LoadStore->getOpcode()) {
3142 case AArch64::STRWui:
3143 case AArch64::STRHHui:
3144 case AArch64::STRBBui:
3145 LoadStore->getOperand(0).setReg(AArch64::WZR);
3146 break;
3147 case AArch64::STRXui:
3148 LoadStore->getOperand(0).setReg(AArch64::XZR);
3149 break;
3150 }
3151 }
3152 }
3153
3154 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3155 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3156 // The any/zextload from a smaller type to i32 should be handled by the
3157 // importer.
3158 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3159 return false;
3160 // If we have an extending load then change the load's type to be a
3161 // narrower reg and zero_extend with SUBREG_TO_REG.
3162 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3163 Register DstReg = LoadStore->getOperand(0).getReg();
3164 LoadStore->getOperand(0).setReg(LdReg);
3165
3166 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3167 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3168 .addImm(0)
3169 .addUse(LdReg)
3170 .addImm(AArch64::sub_32);
3171 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3172 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3173 MRI);
3174 }
3175 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3176 }
3177
3178 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3179 case TargetOpcode::G_INDEXED_SEXTLOAD:
3180 return selectIndexedExtLoad(I, MRI);
3181 case TargetOpcode::G_INDEXED_LOAD:
3182 return selectIndexedLoad(I, MRI);
3183 case TargetOpcode::G_INDEXED_STORE:
3184 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3185
3186 case TargetOpcode::G_LSHR:
3187 case TargetOpcode::G_ASHR:
3188 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3189 return selectVectorAshrLshr(I, MRI);
3190 [[fallthrough]];
3191 case TargetOpcode::G_SHL:
3192 if (Opcode == TargetOpcode::G_SHL &&
3193 MRI.getType(I.getOperand(0).getReg()).isVector())
3194 return selectVectorSHL(I, MRI);
3195
3196 // These shifts were legalized to have 64 bit shift amounts because we
3197 // want to take advantage of the selection patterns that assume the
3198 // immediates are s64s, however, selectBinaryOp will assume both operands
3199 // will have the same bit size.
3200 {
3201 Register SrcReg = I.getOperand(1).getReg();
3202 Register ShiftReg = I.getOperand(2).getReg();
3203 const LLT ShiftTy = MRI.getType(ShiftReg);
3204 const LLT SrcTy = MRI.getType(SrcReg);
3205 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3206 ShiftTy.getSizeInBits() == 64) {
3207 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3208 // Insert a subregister copy to implement a 64->32 trunc
3209 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3210 .addReg(ShiftReg, 0, AArch64::sub_32);
3211 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3212 I.getOperand(2).setReg(Trunc.getReg(0));
3213 }
3214 }
3215 [[fallthrough]];
3216 case TargetOpcode::G_OR: {
3217 // Reject the various things we don't support yet.
3218 if (unsupportedBinOp(I, RBI, MRI, TRI))
3219 return false;
3220
3221 const unsigned OpSize = Ty.getSizeInBits();
3222
3223 const Register DefReg = I.getOperand(0).getReg();
3224 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3225
3226 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3227 if (NewOpc == I.getOpcode())
3228 return false;
3229
3230 I.setDesc(TII.get(NewOpc));
3231 // FIXME: Should the type be always reset in setDesc?
3232
3233 // Now that we selected an opcode, we need to constrain the register
3234 // operands to use appropriate classes.
3236 }
3237
3238 case TargetOpcode::G_PTR_ADD: {
3239 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3240 I.eraseFromParent();
3241 return true;
3242 }
3243
3244 case TargetOpcode::G_SADDE:
3245 case TargetOpcode::G_UADDE:
3246 case TargetOpcode::G_SSUBE:
3247 case TargetOpcode::G_USUBE:
3248 case TargetOpcode::G_SADDO:
3249 case TargetOpcode::G_UADDO:
3250 case TargetOpcode::G_SSUBO:
3251 case TargetOpcode::G_USUBO:
3252 return selectOverflowOp(I, MRI);
3253
3254 case TargetOpcode::G_PTRMASK: {
3255 Register MaskReg = I.getOperand(2).getReg();
3256 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3257 // TODO: Implement arbitrary cases
3258 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3259 return false;
3260
3261 uint64_t Mask = *MaskVal;
3262 I.setDesc(TII.get(AArch64::ANDXri));
3263 I.getOperand(2).ChangeToImmediate(
3265
3267 }
3268 case TargetOpcode::G_PTRTOINT:
3269 case TargetOpcode::G_TRUNC: {
3270 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3271 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3272
3273 const Register DstReg = I.getOperand(0).getReg();
3274 const Register SrcReg = I.getOperand(1).getReg();
3275
3276 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3277 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3278
3279 if (DstRB.getID() != SrcRB.getID()) {
3280 LLVM_DEBUG(
3281 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3282 return false;
3283 }
3284
3285 if (DstRB.getID() == AArch64::GPRRegBankID) {
3286 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3287 if (!DstRC)
3288 return false;
3289
3290 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3291 if (!SrcRC)
3292 return false;
3293
3294 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3295 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3296 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3297 return false;
3298 }
3299
3300 if (DstRC == SrcRC) {
3301 // Nothing to be done
3302 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3303 SrcTy == LLT::scalar(64)) {
3304 llvm_unreachable("TableGen can import this case");
3305 return false;
3306 } else if (DstRC == &AArch64::GPR32RegClass &&
3307 SrcRC == &AArch64::GPR64RegClass) {
3308 I.getOperand(1).setSubReg(AArch64::sub_32);
3309 } else {
3310 LLVM_DEBUG(
3311 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3312 return false;
3313 }
3314
3315 I.setDesc(TII.get(TargetOpcode::COPY));
3316 return true;
3317 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3318 if (DstTy == LLT::fixed_vector(4, 16) &&
3319 SrcTy == LLT::fixed_vector(4, 32)) {
3320 I.setDesc(TII.get(AArch64::XTNv4i16));
3322 return true;
3323 }
3324
3325 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3326 MachineInstr *Extract = emitExtractVectorElt(
3327 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3328 if (!Extract)
3329 return false;
3330 I.eraseFromParent();
3331 return true;
3332 }
3333
3334 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3335 if (Opcode == TargetOpcode::G_PTRTOINT) {
3336 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3337 I.setDesc(TII.get(TargetOpcode::COPY));
3338 return selectCopy(I, TII, MRI, TRI, RBI);
3339 }
3340 }
3341
3342 return false;
3343 }
3344
3345 case TargetOpcode::G_ANYEXT: {
3346 if (selectUSMovFromExtend(I, MRI))
3347 return true;
3348
3349 const Register DstReg = I.getOperand(0).getReg();
3350 const Register SrcReg = I.getOperand(1).getReg();
3351
3352 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3353 if (RBDst.getID() != AArch64::GPRRegBankID) {
3354 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3355 << ", expected: GPR\n");
3356 return false;
3357 }
3358
3359 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3360 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3361 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3362 << ", expected: GPR\n");
3363 return false;
3364 }
3365
3366 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3367
3368 if (DstSize == 0) {
3369 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3370 return false;
3371 }
3372
3373 if (DstSize != 64 && DstSize > 32) {
3374 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3375 << ", expected: 32 or 64\n");
3376 return false;
3377 }
3378 // At this point G_ANYEXT is just like a plain COPY, but we need
3379 // to explicitly form the 64-bit value if any.
3380 if (DstSize > 32) {
3381 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3382 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3383 .addDef(ExtSrc)
3384 .addImm(0)
3385 .addUse(SrcReg)
3386 .addImm(AArch64::sub_32);
3387 I.getOperand(1).setReg(ExtSrc);
3388 }
3389 return selectCopy(I, TII, MRI, TRI, RBI);
3390 }
3391
3392 case TargetOpcode::G_ZEXT:
3393 case TargetOpcode::G_SEXT_INREG:
3394 case TargetOpcode::G_SEXT: {
3395 if (selectUSMovFromExtend(I, MRI))
3396 return true;
3397
3398 unsigned Opcode = I.getOpcode();
3399 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3400 const Register DefReg = I.getOperand(0).getReg();
3401 Register SrcReg = I.getOperand(1).getReg();
3402 const LLT DstTy = MRI.getType(DefReg);
3403 const LLT SrcTy = MRI.getType(SrcReg);
3404 unsigned DstSize = DstTy.getSizeInBits();
3405 unsigned SrcSize = SrcTy.getSizeInBits();
3406
3407 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3408 // extended is encoded in the imm.
3409 if (Opcode == TargetOpcode::G_SEXT_INREG)
3410 SrcSize = I.getOperand(2).getImm();
3411
3412 if (DstTy.isVector())
3413 return false; // Should be handled by imported patterns.
3414
3415 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3416 AArch64::GPRRegBankID &&
3417 "Unexpected ext regbank");
3418
3419 MachineInstr *ExtI;
3420
3421 // First check if we're extending the result of a load which has a dest type
3422 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3423 // GPR register on AArch64 and all loads which are smaller automatically
3424 // zero-extend the upper bits. E.g.
3425 // %v(s8) = G_LOAD %p, :: (load 1)
3426 // %v2(s32) = G_ZEXT %v(s8)
3427 if (!IsSigned) {
3428 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3429 bool IsGPR =
3430 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3431 if (LoadMI && IsGPR) {
3432 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3433 unsigned BytesLoaded = MemOp->getSize().getValue();
3434 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3435 return selectCopy(I, TII, MRI, TRI, RBI);
3436 }
3437
3438 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3439 // + SUBREG_TO_REG.
3440 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3441 Register SubregToRegSrc =
3442 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3443 const Register ZReg = AArch64::WZR;
3444 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3445 .addImm(0);
3446
3447 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3448 .addImm(0)
3449 .addUse(SubregToRegSrc)
3450 .addImm(AArch64::sub_32);
3451
3452 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3453 MRI)) {
3454 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3455 return false;
3456 }
3457
3458 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3459 MRI)) {
3460 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3461 return false;
3462 }
3463
3464 I.eraseFromParent();
3465 return true;
3466 }
3467 }
3468
3469 if (DstSize == 64) {
3470 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3471 // FIXME: Can we avoid manually doing this?
3472 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3473 MRI)) {
3474 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3475 << " operand\n");
3476 return false;
3477 }
3478 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3479 {&AArch64::GPR64RegClass}, {})
3480 .addImm(0)
3481 .addUse(SrcReg)
3482 .addImm(AArch64::sub_32)
3483 .getReg(0);
3484 }
3485
3486 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3487 {DefReg}, {SrcReg})
3488 .addImm(0)
3489 .addImm(SrcSize - 1);
3490 } else if (DstSize <= 32) {
3491 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3492 {DefReg}, {SrcReg})
3493 .addImm(0)
3494 .addImm(SrcSize - 1);
3495 } else {
3496 return false;
3497 }
3498
3500 I.eraseFromParent();
3501 return true;
3502 }
3503
3504 case TargetOpcode::G_SITOFP:
3505 case TargetOpcode::G_UITOFP:
3506 case TargetOpcode::G_FPTOSI:
3507 case TargetOpcode::G_FPTOUI: {
3508 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3509 SrcTy = MRI.getType(I.getOperand(1).getReg());
3510 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3511 if (NewOpc == Opcode)
3512 return false;
3513
3514 I.setDesc(TII.get(NewOpc));
3516 I.setFlags(MachineInstr::NoFPExcept);
3517
3518 return true;
3519 }
3520
3521 case TargetOpcode::G_FREEZE:
3522 return selectCopy(I, TII, MRI, TRI, RBI);
3523
3524 case TargetOpcode::G_INTTOPTR:
3525 // The importer is currently unable to import pointer types since they
3526 // didn't exist in SelectionDAG.
3527 return selectCopy(I, TII, MRI, TRI, RBI);
3528
3529 case TargetOpcode::G_BITCAST:
3530 // Imported SelectionDAG rules can handle every bitcast except those that
3531 // bitcast from a type to the same type. Ideally, these shouldn't occur
3532 // but we might not run an optimizer that deletes them. The other exception
3533 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3534 // of them.
3535 return selectCopy(I, TII, MRI, TRI, RBI);
3536
3537 case TargetOpcode::G_SELECT: {
3538 auto &Sel = cast<GSelect>(I);
3539 const Register CondReg = Sel.getCondReg();
3540 const Register TReg = Sel.getTrueReg();
3541 const Register FReg = Sel.getFalseReg();
3542
3543 if (tryOptSelect(Sel))
3544 return true;
3545
3546 // Make sure to use an unused vreg instead of wzr, so that the peephole
3547 // optimizations will be able to optimize these.
3548 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3549 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3550 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3552 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3553 return false;
3554 Sel.eraseFromParent();
3555 return true;
3556 }
3557 case TargetOpcode::G_ICMP: {
3558 if (Ty.isVector())
3559 return false;
3560
3561 if (Ty != LLT::scalar(32)) {
3562 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3563 << ", expected: " << LLT::scalar(32) << '\n');
3564 return false;
3565 }
3566
3567 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3568 const AArch64CC::CondCode InvCC =
3570 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3571 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3572 /*Src2=*/AArch64::WZR, InvCC, MIB);
3573 I.eraseFromParent();
3574 return true;
3575 }
3576
3577 case TargetOpcode::G_FCMP: {
3578 CmpInst::Predicate Pred =
3579 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3580 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3581 Pred) ||
3582 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3583 return false;
3584 I.eraseFromParent();
3585 return true;
3586 }
3587 case TargetOpcode::G_VASTART:
3588 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3589 : selectVaStartAAPCS(I, MF, MRI);
3590 case TargetOpcode::G_INTRINSIC:
3591 return selectIntrinsic(I, MRI);
3592 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3593 return selectIntrinsicWithSideEffects(I, MRI);
3594 case TargetOpcode::G_IMPLICIT_DEF: {
3595 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3596 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3597 const Register DstReg = I.getOperand(0).getReg();
3598 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3599 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3600 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3601 return true;
3602 }
3603 case TargetOpcode::G_BLOCK_ADDR: {
3604 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3605 if (std::optional<uint16_t> BADisc =
3606 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3607 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3608 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3609 MIB.buildInstr(AArch64::MOVaddrPAC)
3610 .addBlockAddress(I.getOperand(1).getBlockAddress())
3612 .addReg(/*AddrDisc=*/AArch64::XZR)
3613 .addImm(*BADisc)
3614 .constrainAllUses(TII, TRI, RBI);
3615 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3616 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3617 AArch64::GPR64RegClass, MRI);
3618 I.eraseFromParent();
3619 return true;
3620 }
3621 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3622 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3623 I.eraseFromParent();
3624 return true;
3625 } else {
3626 I.setDesc(TII.get(AArch64::MOVaddrBA));
3627 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3628 I.getOperand(0).getReg())
3629 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3630 /* Offset */ 0, AArch64II::MO_PAGE)
3632 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3634 I.eraseFromParent();
3635 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3636 }
3637 }
3638 case AArch64::G_DUP: {
3639 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3640 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3641 // difficult because at RBS we may end up pessimizing the fpr case if we
3642 // decided to add an anyextend to fix this. Manual selection is the most
3643 // robust solution for now.
3644 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3645 AArch64::GPRRegBankID)
3646 return false; // We expect the fpr regbank case to be imported.
3647 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3648 if (VecTy == LLT::fixed_vector(8, 8))
3649 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3650 else if (VecTy == LLT::fixed_vector(16, 8))
3651 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3652 else if (VecTy == LLT::fixed_vector(4, 16))
3653 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3654 else if (VecTy == LLT::fixed_vector(8, 16))
3655 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3656 else
3657 return false;
3659 }
3660 case TargetOpcode::G_BUILD_VECTOR:
3661 return selectBuildVector(I, MRI);
3662 case TargetOpcode::G_MERGE_VALUES:
3663 return selectMergeValues(I, MRI);
3664 case TargetOpcode::G_UNMERGE_VALUES:
3665 return selectUnmergeValues(I, MRI);
3666 case TargetOpcode::G_SHUFFLE_VECTOR:
3667 return selectShuffleVector(I, MRI);
3668 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3669 return selectExtractElt(I, MRI);
3670 case TargetOpcode::G_CONCAT_VECTORS:
3671 return selectConcatVectors(I, MRI);
3672 case TargetOpcode::G_JUMP_TABLE:
3673 return selectJumpTable(I, MRI);
3674 case TargetOpcode::G_MEMCPY:
3675 case TargetOpcode::G_MEMCPY_INLINE:
3676 case TargetOpcode::G_MEMMOVE:
3677 case TargetOpcode::G_MEMSET:
3678 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3679 return selectMOPS(I, MRI);
3680 }
3681
3682 return false;
3683}
3684
3685bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3686 MachineIRBuilderState OldMIBState = MIB.getState();
3687 bool Success = select(I);
3688 MIB.setState(OldMIBState);
3689 return Success;
3690}
3691
3692bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3694 unsigned Mopcode;
3695 switch (GI.getOpcode()) {
3696 case TargetOpcode::G_MEMCPY:
3697 case TargetOpcode::G_MEMCPY_INLINE:
3698 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3699 break;
3700 case TargetOpcode::G_MEMMOVE:
3701 Mopcode = AArch64::MOPSMemoryMovePseudo;
3702 break;
3703 case TargetOpcode::G_MEMSET:
3704 // For tagged memset see llvm.aarch64.mops.memset.tag
3705 Mopcode = AArch64::MOPSMemorySetPseudo;
3706 break;
3707 }
3708
3709 auto &DstPtr = GI.getOperand(0);
3710 auto &SrcOrVal = GI.getOperand(1);
3711 auto &Size = GI.getOperand(2);
3712
3713 // Create copies of the registers that can be clobbered.
3714 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3715 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3716 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3717
3718 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3719 const auto &SrcValRegClass =
3720 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3721
3722 // Constrain to specific registers
3723 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3724 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3725 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3726
3727 MIB.buildCopy(DstPtrCopy, DstPtr);
3728 MIB.buildCopy(SrcValCopy, SrcOrVal);
3729 MIB.buildCopy(SizeCopy, Size);
3730
3731 // New instruction uses the copied registers because it must update them.
3732 // The defs are not used since they don't exist in G_MEM*. They are still
3733 // tied.
3734 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3735 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3736 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3737 if (IsSet) {
3738 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3739 {DstPtrCopy, SizeCopy, SrcValCopy});
3740 } else {
3741 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3742 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3743 {DstPtrCopy, SrcValCopy, SizeCopy});
3744 }
3745
3746 GI.eraseFromParent();
3747 return true;
3748}
3749
3750bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3752 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3753 Register JTAddr = I.getOperand(0).getReg();
3754 unsigned JTI = I.getOperand(1).getIndex();
3755 Register Index = I.getOperand(2).getReg();
3756
3757 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3758
3759 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3760 // sequence later, to guarantee the integrity of the intermediate values.
3761 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3762 CodeModel::Model CM = TM.getCodeModel();
3763 if (STI.isTargetMachO()) {
3764 if (CM != CodeModel::Small && CM != CodeModel::Large)
3765 report_fatal_error("Unsupported code-model for hardened jump-table");
3766 } else {
3767 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3768 assert(STI.isTargetELF() &&
3769 "jump table hardening only supported on MachO/ELF");
3770 if (CM != CodeModel::Small)
3771 report_fatal_error("Unsupported code-model for hardened jump-table");
3772 }
3773
3774 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3775 MIB.buildInstr(AArch64::BR_JumpTable)
3776 .addJumpTableIndex(I.getOperand(1).getIndex());
3777 I.eraseFromParent();
3778 return true;
3779 }
3780
3781 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3782 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3783
3784 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3785 {TargetReg, ScratchReg}, {JTAddr, Index})
3786 .addJumpTableIndex(JTI);
3787 // Save the jump table info.
3788 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3789 {static_cast<int64_t>(JTI)});
3790 // Build the indirect branch.
3791 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3792 I.eraseFromParent();
3793 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3794}
3795
3796bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3798 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3799 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3800
3801 Register DstReg = I.getOperand(0).getReg();
3802 unsigned JTI = I.getOperand(1).getIndex();
3803 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3804 auto MovMI =
3805 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3806 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3808 I.eraseFromParent();
3809 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3810}
3811
3812bool AArch64InstructionSelector::selectTLSGlobalValue(
3814 if (!STI.isTargetMachO())
3815 return false;
3816 MachineFunction &MF = *I.getParent()->getParent();
3817 MF.getFrameInfo().setAdjustsStack(true);
3818
3819 const auto &GlobalOp = I.getOperand(1);
3820 assert(GlobalOp.getOffset() == 0 &&
3821 "Shouldn't have an offset on TLS globals!");
3822 const GlobalValue &GV = *GlobalOp.getGlobal();
3823
3824 auto LoadGOT =
3825 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3826 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3827
3828 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3829 {LoadGOT.getReg(0)})
3830 .addImm(0);
3831
3832 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3833 // TLS calls preserve all registers except those that absolutely must be
3834 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3835 // silly).
3836 unsigned Opcode = getBLRCallOpcode(MF);
3837
3838 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3839 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3840 assert(Opcode == AArch64::BLR);
3841 Opcode = AArch64::BLRAAZ;
3842 }
3843
3844 MIB.buildInstr(Opcode, {}, {Load})
3845 .addUse(AArch64::X0, RegState::Implicit)
3846 .addDef(AArch64::X0, RegState::Implicit)
3847 .addRegMask(TRI.getTLSCallPreservedMask());
3848
3849 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3850 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3851 MRI);
3852 I.eraseFromParent();
3853 return true;
3854}
3855
3856MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3857 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3858 MachineIRBuilder &MIRBuilder) const {
3859 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3860
3861 auto BuildFn = [&](unsigned SubregIndex) {
3862 auto Ins =
3863 MIRBuilder
3864 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3865 .addImm(SubregIndex);
3868 return &*Ins;
3869 };
3870
3871 switch (EltSize) {
3872 case 8:
3873 return BuildFn(AArch64::bsub);
3874 case 16:
3875 return BuildFn(AArch64::hsub);
3876 case 32:
3877 return BuildFn(AArch64::ssub);
3878 case 64:
3879 return BuildFn(AArch64::dsub);
3880 default:
3881 return nullptr;
3882 }
3883}
3884
3886AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3887 MachineIRBuilder &MIB,
3888 MachineRegisterInfo &MRI) const {
3889 LLT DstTy = MRI.getType(DstReg);
3890 const TargetRegisterClass *RC =
3891 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3892 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3893 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3894 return nullptr;
3895 }
3896 unsigned SubReg = 0;
3897 if (!getSubRegForClass(RC, TRI, SubReg))
3898 return nullptr;
3899 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3900 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3901 << DstTy.getSizeInBits() << "\n");
3902 return nullptr;
3903 }
3904 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3905 .addReg(SrcReg, 0, SubReg);
3906 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3907 return Copy;
3908}
3909
3910bool AArch64InstructionSelector::selectMergeValues(
3912 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3913 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3914 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3915 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3916 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3917
3918 if (I.getNumOperands() != 3)
3919 return false;
3920
3921 // Merging 2 s64s into an s128.
3922 if (DstTy == LLT::scalar(128)) {
3923 if (SrcTy.getSizeInBits() != 64)
3924 return false;
3925 Register DstReg = I.getOperand(0).getReg();
3926 Register Src1Reg = I.getOperand(1).getReg();
3927 Register Src2Reg = I.getOperand(2).getReg();
3928 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3929 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3930 /* LaneIdx */ 0, RB, MIB);
3931 if (!InsMI)
3932 return false;
3933 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3934 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3935 if (!Ins2MI)
3936 return false;
3939 I.eraseFromParent();
3940 return true;
3941 }
3942
3943 if (RB.getID() != AArch64::GPRRegBankID)
3944 return false;
3945
3946 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3947 return false;
3948
3949 auto *DstRC = &AArch64::GPR64RegClass;
3950 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3951 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3952 TII.get(TargetOpcode::SUBREG_TO_REG))
3953 .addDef(SubToRegDef)
3954 .addImm(0)
3955 .addUse(I.getOperand(1).getReg())
3956 .addImm(AArch64::sub_32);
3957 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3958 // Need to anyext the second scalar before we can use bfm
3959 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3960 TII.get(TargetOpcode::SUBREG_TO_REG))
3961 .addDef(SubToRegDef2)
3962 .addImm(0)
3963 .addUse(I.getOperand(2).getReg())
3964 .addImm(AArch64::sub_32);
3965 MachineInstr &BFM =
3966 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3967 .addDef(I.getOperand(0).getReg())
3968 .addUse(SubToRegDef)
3969 .addUse(SubToRegDef2)
3970 .addImm(32)
3971 .addImm(31);
3972 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3973 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3975 I.eraseFromParent();
3976 return true;
3977}
3978
3979static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3980 const unsigned EltSize) {
3981 // Choose a lane copy opcode and subregister based off of the size of the
3982 // vector's elements.
3983 switch (EltSize) {
3984 case 8:
3985 CopyOpc = AArch64::DUPi8;
3986 ExtractSubReg = AArch64::bsub;
3987 break;
3988 case 16:
3989 CopyOpc = AArch64::DUPi16;
3990 ExtractSubReg = AArch64::hsub;
3991 break;
3992 case 32:
3993 CopyOpc = AArch64::DUPi32;
3994 ExtractSubReg = AArch64::ssub;
3995 break;
3996 case 64:
3997 CopyOpc = AArch64::DUPi64;
3998 ExtractSubReg = AArch64::dsub;
3999 break;
4000 default:
4001 // Unknown size, bail out.
4002 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
4003 return false;
4004 }
4005 return true;
4006}
4007
4008MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4009 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4010 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4011 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4012 unsigned CopyOpc = 0;
4013 unsigned ExtractSubReg = 0;
4014 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4015 LLVM_DEBUG(
4016 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
4017 return nullptr;
4018 }
4019
4020 const TargetRegisterClass *DstRC =
4021 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4022 if (!DstRC) {
4023 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
4024 return nullptr;
4025 }
4026
4027 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4028 const LLT &VecTy = MRI.getType(VecReg);
4029 const TargetRegisterClass *VecRC =
4030 getRegClassForTypeOnBank(VecTy, VecRB, true);
4031 if (!VecRC) {
4032 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4033 return nullptr;
4034 }
4035
4036 // The register that we're going to copy into.
4037 Register InsertReg = VecReg;
4038 if (!DstReg)
4039 DstReg = MRI.createVirtualRegister(DstRC);
4040 // If the lane index is 0, we just use a subregister COPY.
4041 if (LaneIdx == 0) {
4042 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4043 .addReg(VecReg, 0, ExtractSubReg);
4044 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4045 return &*Copy;
4046 }
4047
4048 // Lane copies require 128-bit wide registers. If we're dealing with an
4049 // unpacked vector, then we need to move up to that width. Insert an implicit
4050 // def and a subregister insert to get us there.
4051 if (VecTy.getSizeInBits() != 128) {
4052 MachineInstr *ScalarToVector = emitScalarToVector(
4053 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4054 if (!ScalarToVector)
4055 return nullptr;
4056 InsertReg = ScalarToVector->getOperand(0).getReg();
4057 }
4058
4059 MachineInstr *LaneCopyMI =
4060 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4061 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4062
4063 // Make sure that we actually constrain the initial copy.
4064 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4065 return LaneCopyMI;
4066}
4067
4068bool AArch64InstructionSelector::selectExtractElt(
4070 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4071 "unexpected opcode!");
4072 Register DstReg = I.getOperand(0).getReg();
4073 const LLT NarrowTy = MRI.getType(DstReg);
4074 const Register SrcReg = I.getOperand(1).getReg();
4075 const LLT WideTy = MRI.getType(SrcReg);
4076 (void)WideTy;
4077 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4078 "source register size too small!");
4079 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4080
4081 // Need the lane index to determine the correct copy opcode.
4082 MachineOperand &LaneIdxOp = I.getOperand(2);
4083 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4084
4085 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4086 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4087 return false;
4088 }
4089
4090 // Find the index to extract from.
4091 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4092 if (!VRegAndVal)
4093 return false;
4094 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4095
4096
4097 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4098 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4099 LaneIdx, MIB);
4100 if (!Extract)
4101 return false;
4102
4103 I.eraseFromParent();
4104 return true;
4105}
4106
4107bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4109 unsigned NumElts = I.getNumOperands() - 1;
4110 Register SrcReg = I.getOperand(NumElts).getReg();
4111 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4112 const LLT SrcTy = MRI.getType(SrcReg);
4113
4114 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4115 if (SrcTy.getSizeInBits() > 128) {
4116 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4117 return false;
4118 }
4119
4120 // We implement a split vector operation by treating the sub-vectors as
4121 // scalars and extracting them.
4122 const RegisterBank &DstRB =
4123 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4124 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4125 Register Dst = I.getOperand(OpIdx).getReg();
4126 MachineInstr *Extract =
4127 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4128 if (!Extract)
4129 return false;
4130 }
4131 I.eraseFromParent();
4132 return true;
4133}
4134
4135bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4137 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4138 "unexpected opcode");
4139
4140 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4141 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4142 AArch64::FPRRegBankID ||
4143 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4144 AArch64::FPRRegBankID) {
4145 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4146 "currently unsupported.\n");
4147 return false;
4148 }
4149
4150 // The last operand is the vector source register, and every other operand is
4151 // a register to unpack into.
4152 unsigned NumElts = I.getNumOperands() - 1;
4153 Register SrcReg = I.getOperand(NumElts).getReg();
4154 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4155 const LLT WideTy = MRI.getType(SrcReg);
4156 (void)WideTy;
4157 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4158 "can only unmerge from vector or s128 types!");
4159 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4160 "source register size too small!");
4161
4162 if (!NarrowTy.isScalar())
4163 return selectSplitVectorUnmerge(I, MRI);
4164
4165 // Choose a lane copy opcode and subregister based off of the size of the
4166 // vector's elements.
4167 unsigned CopyOpc = 0;
4168 unsigned ExtractSubReg = 0;
4169 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4170 return false;
4171
4172 // Set up for the lane copies.
4173 MachineBasicBlock &MBB = *I.getParent();
4174
4175 // Stores the registers we'll be copying from.
4176 SmallVector<Register, 4> InsertRegs;
4177
4178 // We'll use the first register twice, so we only need NumElts-1 registers.
4179 unsigned NumInsertRegs = NumElts - 1;
4180
4181 // If our elements fit into exactly 128 bits, then we can copy from the source
4182 // directly. Otherwise, we need to do a bit of setup with some subregister
4183 // inserts.
4184 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4185 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4186 } else {
4187 // No. We have to perform subregister inserts. For each insert, create an
4188 // implicit def and a subregister insert, and save the register we create.
4189 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4190 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4191 *RBI.getRegBank(SrcReg, MRI, TRI));
4192 unsigned SubReg = 0;
4193 bool Found = getSubRegForClass(RC, TRI, SubReg);
4194 (void)Found;
4195 assert(Found && "expected to find last operand's subeg idx");
4196 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4197 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4198 MachineInstr &ImpDefMI =
4199 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4200 ImpDefReg);
4201
4202 // Now, create the subregister insert from SrcReg.
4203 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4204 MachineInstr &InsMI =
4205 *BuildMI(MBB, I, I.getDebugLoc(),
4206 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4207 .addUse(ImpDefReg)
4208 .addUse(SrcReg)
4209 .addImm(SubReg);
4210
4211 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4213
4214 // Save the register so that we can copy from it after.
4215 InsertRegs.push_back(InsertReg);
4216 }
4217 }
4218
4219 // Now that we've created any necessary subregister inserts, we can
4220 // create the copies.
4221 //
4222 // Perform the first copy separately as a subregister copy.
4223 Register CopyTo = I.getOperand(0).getReg();
4224 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4225 .addReg(InsertRegs[0], 0, ExtractSubReg);
4226 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4227
4228 // Now, perform the remaining copies as vector lane copies.
4229 unsigned LaneIdx = 1;
4230 for (Register InsReg : InsertRegs) {
4231 Register CopyTo = I.getOperand(LaneIdx).getReg();
4232 MachineInstr &CopyInst =
4233 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4234 .addUse(InsReg)
4235 .addImm(LaneIdx);
4236 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4237 ++LaneIdx;
4238 }
4239
4240 // Separately constrain the first copy's destination. Because of the
4241 // limitation in constrainOperandRegClass, we can't guarantee that this will
4242 // actually be constrained. So, do it ourselves using the second operand.
4243 const TargetRegisterClass *RC =
4244 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4245 if (!RC) {
4246 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4247 return false;
4248 }
4249
4250 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4251 I.eraseFromParent();
4252 return true;
4253}
4254
4255bool AArch64InstructionSelector::selectConcatVectors(
4257 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4258 "Unexpected opcode");
4259 Register Dst = I.getOperand(0).getReg();
4260 Register Op1 = I.getOperand(1).getReg();
4261 Register Op2 = I.getOperand(2).getReg();
4262 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4263 if (!ConcatMI)
4264 return false;
4265 I.eraseFromParent();
4266 return true;
4267}
4268
4269unsigned
4270AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4271 MachineFunction &MF) const {
4272 Type *CPTy = CPVal->getType();
4273 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4274
4276 return MCP->getConstantPoolIndex(CPVal, Alignment);
4277}
4278
4279MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4280 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4281 const TargetRegisterClass *RC;
4282 unsigned Opc;
4283 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4284 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4285 switch (Size) {
4286 case 16:
4287 RC = &AArch64::FPR128RegClass;
4288 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4289 break;
4290 case 8:
4291 RC = &AArch64::FPR64RegClass;
4292 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4293 break;
4294 case 4:
4295 RC = &AArch64::FPR32RegClass;
4296 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4297 break;
4298 case 2:
4299 RC = &AArch64::FPR16RegClass;
4300 Opc = AArch64::LDRHui;
4301 break;
4302 default:
4303 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4304 << *CPVal->getType());
4305 return nullptr;
4306 }
4307
4308 MachineInstr *LoadMI = nullptr;
4309 auto &MF = MIRBuilder.getMF();
4310 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4311 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4312 // Use load(literal) for tiny code model.
4313 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4314 } else {
4315 auto Adrp =
4316 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4317 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4318
4319 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4320 .addConstantPoolIndex(
4322
4324 }
4325
4327 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4329 Size, Align(Size)));
4331 return LoadMI;
4332}
4333
4334/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4335/// size and RB.
4336static std::pair<unsigned, unsigned>
4337getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4338 unsigned Opc, SubregIdx;
4339 if (RB.getID() == AArch64::GPRRegBankID) {
4340 if (EltSize == 8) {
4341 Opc = AArch64::INSvi8gpr;
4342 SubregIdx = AArch64::bsub;
4343 } else if (EltSize == 16) {
4344 Opc = AArch64::INSvi16gpr;
4345 SubregIdx = AArch64::ssub;
4346 } else if (EltSize == 32) {
4347 Opc = AArch64::INSvi32gpr;
4348 SubregIdx = AArch64::ssub;
4349 } else if (EltSize == 64) {
4350 Opc = AArch64::INSvi64gpr;
4351 SubregIdx = AArch64::dsub;
4352 } else {
4353 llvm_unreachable("invalid elt size!");
4354 }
4355 } else {
4356 if (EltSize == 8) {
4357 Opc = AArch64::INSvi8lane;
4358 SubregIdx = AArch64::bsub;
4359 } else if (EltSize == 16) {
4360 Opc = AArch64::INSvi16lane;
4361 SubregIdx = AArch64::hsub;
4362 } else if (EltSize == 32) {
4363 Opc = AArch64::INSvi32lane;
4364 SubregIdx = AArch64::ssub;
4365 } else if (EltSize == 64) {
4366 Opc = AArch64::INSvi64lane;
4367 SubregIdx = AArch64::dsub;
4368 } else {
4369 llvm_unreachable("invalid elt size!");
4370 }
4371 }
4372 return std::make_pair(Opc, SubregIdx);
4373}
4374
4375MachineInstr *AArch64InstructionSelector::emitInstr(
4376 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4377 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4378 const ComplexRendererFns &RenderFns) const {
4379 assert(Opcode && "Expected an opcode?");
4380 assert(!isPreISelGenericOpcode(Opcode) &&
4381 "Function should only be used to produce selected instructions!");
4382 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4383 if (RenderFns)
4384 for (auto &Fn : *RenderFns)
4385 Fn(MI);
4387 return &*MI;
4388}
4389
4390MachineInstr *AArch64InstructionSelector::emitAddSub(
4391 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4392 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4393 MachineIRBuilder &MIRBuilder) const {
4394 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4395 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4396 auto Ty = MRI.getType(LHS.getReg());
4397 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4398 unsigned Size = Ty.getSizeInBits();
4399 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4400 bool Is32Bit = Size == 32;
4401
4402 // INSTRri form with positive arithmetic immediate.
4403 if (auto Fns = selectArithImmed(RHS))
4404 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4405 MIRBuilder, Fns);
4406
4407 // INSTRri form with negative arithmetic immediate.
4408 if (auto Fns = selectNegArithImmed(RHS))
4409 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4410 MIRBuilder, Fns);
4411
4412 // INSTRrx form.
4413 if (auto Fns = selectArithExtendedRegister(RHS))
4414 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4415 MIRBuilder, Fns);
4416
4417 // INSTRrs form.
4418 if (auto Fns = selectShiftedRegister(RHS))
4419 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4420 MIRBuilder, Fns);
4421 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4422 MIRBuilder);
4423}
4424
4426AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4427 MachineOperand &RHS,
4428 MachineIRBuilder &MIRBuilder) const {
4429 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4430 {{AArch64::ADDXri, AArch64::ADDWri},
4431 {AArch64::ADDXrs, AArch64::ADDWrs},
4432 {AArch64::ADDXrr, AArch64::ADDWrr},
4433 {AArch64::SUBXri, AArch64::SUBWri},
4434 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4435 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4436}
4437
4439AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4440 MachineOperand &RHS,
4441 MachineIRBuilder &MIRBuilder) const {
4442 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4443 {{AArch64::ADDSXri, AArch64::ADDSWri},
4444 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4445 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4446 {AArch64::SUBSXri, AArch64::SUBSWri},
4447 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4448 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4449}
4450
4452AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4453 MachineOperand &RHS,
4454 MachineIRBuilder &MIRBuilder) const {
4455 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4456 {{AArch64::SUBSXri, AArch64::SUBSWri},
4457 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4458 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4459 {AArch64::ADDSXri, AArch64::ADDSWri},
4460 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4461 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4462}
4463
4465AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4466 MachineOperand &RHS,
4467 MachineIRBuilder &MIRBuilder) const {
4468 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4469 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4470 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4471 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4472 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4473}
4474
4476AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4477 MachineOperand &RHS,
4478 MachineIRBuilder &MIRBuilder) const {
4479 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4480 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4481 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4482 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4483 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4484}
4485
4487AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4488 MachineIRBuilder &MIRBuilder) const {
4489 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4490 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4491 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4492 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4493}
4494
4496AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4497 MachineIRBuilder &MIRBuilder) const {
4498 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4499 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500 LLT Ty = MRI.getType(LHS.getReg());
4501 unsigned RegSize = Ty.getSizeInBits();
4502 bool Is32Bit = (RegSize == 32);
4503 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4504 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4505 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4506 // ANDS needs a logical immediate for its immediate form. Check if we can
4507 // fold one in.
4508 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4509 int64_t Imm = ValAndVReg->Value.getSExtValue();
4510
4512 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4515 return &*TstMI;
4516 }
4517 }
4518
4519 if (auto Fns = selectLogicalShiftedRegister(RHS))
4520 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4521 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4522}
4523
4524MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4525 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4526 MachineIRBuilder &MIRBuilder) const {
4527 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4528 assert(Predicate.isPredicate() && "Expected predicate?");
4529 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4530 LLT CmpTy = MRI.getType(LHS.getReg());
4531 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4532 unsigned Size = CmpTy.getSizeInBits();
4533 (void)Size;
4534 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4535 // Fold the compare into a cmn or tst if possible.
4536 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4537 return FoldCmp;
4538 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4539 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4540}
4541
4542MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4543 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4544 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4545#ifndef NDEBUG
4546 LLT Ty = MRI.getType(Dst);
4547 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4548 "Expected a 32-bit scalar register?");
4549#endif
4550 const Register ZReg = AArch64::WZR;
4551 AArch64CC::CondCode CC1, CC2;
4552 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4553 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4554 if (CC2 == AArch64CC::AL)
4555 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4556 MIRBuilder);
4557 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4558 Register Def1Reg = MRI.createVirtualRegister(RC);
4559 Register Def2Reg = MRI.createVirtualRegister(RC);
4560 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4561 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4562 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4563 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4565 return &*OrMI;
4566}
4567
4568MachineInstr *AArch64InstructionSelector::emitFPCompare(
4569 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4570 std::optional<CmpInst::Predicate> Pred) const {
4571 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4572 LLT Ty = MRI.getType(LHS);
4573 if (Ty.isVector())
4574 return nullptr;
4575 unsigned OpSize = Ty.getSizeInBits();
4576 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4577
4578 // If this is a compare against +0.0, then we don't have
4579 // to explicitly materialize a constant.
4580 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4581 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4582
4583 auto IsEqualityPred = [](CmpInst::Predicate P) {
4584 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4586 };
4587 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4588 // Try commutating the operands.
4589 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4590 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4591 ShouldUseImm = true;
4592 std::swap(LHS, RHS);
4593 }
4594 }
4595 unsigned CmpOpcTbl[2][3] = {
4596 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4597 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4598 unsigned CmpOpc =
4599 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4600
4601 // Partially build the compare. Decide if we need to add a use for the
4602 // third operand based off whether or not we're comparing against 0.0.
4603 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4605 if (!ShouldUseImm)
4606 CmpMI.addUse(RHS);
4608 return &*CmpMI;
4609}
4610
4611MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4612 std::optional<Register> Dst, Register Op1, Register Op2,
4613 MachineIRBuilder &MIRBuilder) const {
4614 // We implement a vector concat by:
4615 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4616 // 2. Insert the upper vector into the destination's upper element
4617 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4618 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4619
4620 const LLT Op1Ty = MRI.getType(Op1);
4621 const LLT Op2Ty = MRI.getType(Op2);
4622
4623 if (Op1Ty != Op2Ty) {
4624 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4625 return nullptr;
4626 }
4627 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4628
4629 if (Op1Ty.getSizeInBits() >= 128) {
4630 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4631 return nullptr;
4632 }
4633
4634 // At the moment we just support 64 bit vector concats.
4635 if (Op1Ty.getSizeInBits() != 64) {
4636 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4637 return nullptr;
4638 }
4639
4640 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4641 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4642 const TargetRegisterClass *DstRC =
4643 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4644
4645 MachineInstr *WidenedOp1 =
4646 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4647 MachineInstr *WidenedOp2 =
4648 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4649 if (!WidenedOp1 || !WidenedOp2) {
4650 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4651 return nullptr;
4652 }
4653
4654 // Now do the insert of the upper element.
4655 unsigned InsertOpc, InsSubRegIdx;
4656 std::tie(InsertOpc, InsSubRegIdx) =
4657 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4658
4659 if (!Dst)
4660 Dst = MRI.createVirtualRegister(DstRC);
4661 auto InsElt =
4662 MIRBuilder
4663 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4664 .addImm(1) /* Lane index */
4665 .addUse(WidenedOp2->getOperand(0).getReg())
4666 .addImm(0);
4668 return &*InsElt;
4669}
4670
4672AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4673 Register Src2, AArch64CC::CondCode Pred,
4674 MachineIRBuilder &MIRBuilder) const {
4675 auto &MRI = *MIRBuilder.getMRI();
4676 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4677 // If we used a register class, then this won't necessarily have an LLT.
4678 // Compute the size based off whether or not we have a class or bank.
4679 unsigned Size;
4680 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4681 Size = TRI.getRegSizeInBits(*RC);
4682 else
4683 Size = MRI.getType(Dst).getSizeInBits();
4684 // Some opcodes use s1.
4685 assert(Size <= 64 && "Expected 64 bits or less only!");
4686 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4687 unsigned Opc = OpcTable[Size == 64];
4688 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4690 return &*CSINC;
4691}
4692
4693MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4694 Register CarryReg) {
4696 unsigned Opcode = I.getOpcode();
4697
4698 // If the instruction is a SUB, we need to negate the carry,
4699 // because borrowing is indicated by carry-flag == 0.
4700 bool NeedsNegatedCarry =
4701 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4702
4703 // If the previous instruction will already produce the correct carry, do not
4704 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4705 // generated during legalization of wide add/sub. This optimization depends on
4706 // these sequences not being interrupted by other instructions.
4707 // We have to select the previous instruction before the carry-using
4708 // instruction is deleted by the calling function, otherwise the previous
4709 // instruction might become dead and would get deleted.
4710 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4711 if (SrcMI == I.getPrevNode()) {
4712 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4713 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4714 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4715 CarrySrcMI->isUnsigned() &&
4716 CarrySrcMI->getCarryOutReg() == CarryReg &&
4717 selectAndRestoreState(*SrcMI))
4718 return nullptr;
4719 }
4720 }
4721
4722 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4723
4724 if (NeedsNegatedCarry) {
4725 // (0 - Carry) sets !C in NZCV when Carry == 1
4726 Register ZReg = AArch64::WZR;
4727 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4728 }
4729
4730 // (Carry - 1) sets !C in NZCV when Carry == 0
4731 auto Fns = select12BitValueWithLeftShift(1);
4732 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4733}
4734
4735bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4737 auto &CarryMI = cast<GAddSubCarryOut>(I);
4738
4739 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4740 // Set NZCV carry according to carry-in VReg
4741 emitCarryIn(I, CarryInMI->getCarryInReg());
4742 }
4743
4744 // Emit the operation and get the correct condition code.
4745 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4746 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4747
4748 Register CarryOutReg = CarryMI.getCarryOutReg();
4749
4750 // Don't convert carry-out to VReg if it is never used
4751 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4752 // Now, put the overflow result in the register given by the first operand
4753 // to the overflow op. CSINC increments the result when the predicate is
4754 // false, so to get the increment when it's true, we need to use the
4755 // inverse. In this case, we want to increment when carry is set.
4756 Register ZReg = AArch64::WZR;
4757 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4758 getInvertedCondCode(OpAndCC.second), MIB);
4759 }
4760
4761 I.eraseFromParent();
4762 return true;
4763}
4764
4765std::pair<MachineInstr *, AArch64CC::CondCode>
4766AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4767 MachineOperand &LHS,
4768 MachineOperand &RHS,
4769 MachineIRBuilder &MIRBuilder) const {
4770 switch (Opcode) {
4771 default:
4772 llvm_unreachable("Unexpected opcode!");
4773 case TargetOpcode::G_SADDO:
4774 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4775 case TargetOpcode::G_UADDO:
4776 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4777 case TargetOpcode::G_SSUBO:
4778 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4779 case TargetOpcode::G_USUBO:
4780 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4781 case TargetOpcode::G_SADDE:
4782 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4783 case TargetOpcode::G_UADDE:
4784 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4785 case TargetOpcode::G_SSUBE:
4786 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4787 case TargetOpcode::G_USUBE:
4788 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4789 }
4790}
4791
4792/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4793/// expressed as a conjunction.
4794/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4795/// changing the conditions on the CMP tests.
4796/// (this means we can call emitConjunctionRec() with
4797/// Negate==true on this sub-tree)
4798/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4799/// cannot do the negation naturally. We are required to
4800/// emit the subtree first in this case.
4801/// \param WillNegate Is true if are called when the result of this
4802/// subexpression must be negated. This happens when the
4803/// outer expression is an OR. We can use this fact to know
4804/// that we have a double negation (or (or ...) ...) that
4805/// can be implemented for free.
4806static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4807 bool WillNegate, MachineRegisterInfo &MRI,
4808 unsigned Depth = 0) {
4809 if (!MRI.hasOneNonDBGUse(Val))
4810 return false;
4811 MachineInstr *ValDef = MRI.getVRegDef(Val);
4812 unsigned Opcode = ValDef->getOpcode();
4813 if (isa<GAnyCmp>(ValDef)) {
4814 CanNegate = true;
4815 MustBeFirst = false;
4816 return true;
4817 }
4818 // Protect against exponential runtime and stack overflow.
4819 if (Depth > 6)
4820 return false;
4821 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4822 bool IsOR = Opcode == TargetOpcode::G_OR;
4823 Register O0 = ValDef->getOperand(1).getReg();
4824 Register O1 = ValDef->getOperand(2).getReg();
4825 bool CanNegateL;
4826 bool MustBeFirstL;
4827 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4828 return false;
4829 bool CanNegateR;
4830 bool MustBeFirstR;
4831 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4832 return false;
4833
4834 if (MustBeFirstL && MustBeFirstR)
4835 return false;
4836
4837 if (IsOR) {
4838 // For an OR expression we need to be able to naturally negate at least
4839 // one side or we cannot do the transformation at all.
4840 if (!CanNegateL && !CanNegateR)
4841 return false;
4842 // If we the result of the OR will be negated and we can naturally negate
4843 // the leaves, then this sub-tree as a whole negates naturally.
4844 CanNegate = WillNegate && CanNegateL && CanNegateR;
4845 // If we cannot naturally negate the whole sub-tree, then this must be
4846 // emitted first.
4847 MustBeFirst = !CanNegate;
4848 } else {
4849 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4850 // We cannot naturally negate an AND operation.
4851 CanNegate = false;
4852 MustBeFirst = MustBeFirstL || MustBeFirstR;
4853 }
4854 return true;
4855 }
4856 return false;
4857}
4858
4859MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4862 MachineIRBuilder &MIB) const {
4863 auto &MRI = *MIB.getMRI();
4864 LLT OpTy = MRI.getType(LHS);
4865 unsigned CCmpOpc;
4866 std::optional<ValueAndVReg> C;
4868 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4870 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4871 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4872 else if (C->Value.ule(31))
4873 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4874 else
4875 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4876 } else {
4877 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4878 OpTy.getSizeInBits() == 64);
4879 switch (OpTy.getSizeInBits()) {
4880 case 16:
4881 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4882 CCmpOpc = AArch64::FCCMPHrr;
4883 break;
4884 case 32:
4885 CCmpOpc = AArch64::FCCMPSrr;
4886 break;
4887 case 64:
4888 CCmpOpc = AArch64::FCCMPDrr;
4889 break;
4890 default:
4891 return nullptr;
4892 }
4893 }
4895 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4896 auto CCmp =
4897 MIB.buildInstr(CCmpOpc, {}, {LHS});
4898 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4899 CCmp.addImm(C->Value.getZExtValue());
4900 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4901 CCmp.addImm(C->Value.abs().getZExtValue());
4902 else
4903 CCmp.addReg(RHS);
4904 CCmp.addImm(NZCV).addImm(Predicate);
4906 return &*CCmp;
4907}
4908
4909MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4910 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4911 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4912 // We're at a tree leaf, produce a conditional comparison operation.
4913 auto &MRI = *MIB.getMRI();
4914 MachineInstr *ValDef = MRI.getVRegDef(Val);
4915 unsigned Opcode = ValDef->getOpcode();
4916 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4917 Register LHS = Cmp->getLHSReg();
4918 Register RHS = Cmp->getRHSReg();
4919 CmpInst::Predicate CC = Cmp->getCond();
4920 if (Negate)
4922 if (isa<GICmp>(Cmp)) {
4924 } else {
4925 // Handle special FP cases.
4926 AArch64CC::CondCode ExtraCC;
4927 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4928 // Some floating point conditions can't be tested with a single condition
4929 // code. Construct an additional comparison in this case.
4930 if (ExtraCC != AArch64CC::AL) {
4931 MachineInstr *ExtraCmp;
4932 if (!CCOp)
4933 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4934 else
4935 ExtraCmp =
4936 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4937 CCOp = ExtraCmp->getOperand(0).getReg();
4938 Predicate = ExtraCC;
4939 }
4940 }
4941
4942 // Produce a normal comparison if we are first in the chain
4943 if (!CCOp) {
4944 auto Dst = MRI.cloneVirtualRegister(LHS);
4945 if (isa<GICmp>(Cmp))
4946 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4947 return emitFPCompare(Cmp->getOperand(2).getReg(),
4948 Cmp->getOperand(3).getReg(), MIB);
4949 }
4950 // Otherwise produce a ccmp.
4951 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4952 }
4953 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4954
4955 bool IsOR = Opcode == TargetOpcode::G_OR;
4956
4957 Register LHS = ValDef->getOperand(1).getReg();
4958 bool CanNegateL;
4959 bool MustBeFirstL;
4960 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4961 assert(ValidL && "Valid conjunction/disjunction tree");
4962 (void)ValidL;
4963
4964 Register RHS = ValDef->getOperand(2).getReg();
4965 bool CanNegateR;
4966 bool MustBeFirstR;
4967 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4968 assert(ValidR && "Valid conjunction/disjunction tree");
4969 (void)ValidR;
4970
4971 // Swap sub-tree that must come first to the right side.
4972 if (MustBeFirstL) {
4973 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4974 std::swap(LHS, RHS);
4975 std::swap(CanNegateL, CanNegateR);
4976 std::swap(MustBeFirstL, MustBeFirstR);
4977 }
4978
4979 bool NegateR;
4980 bool NegateAfterR;
4981 bool NegateL;
4982 bool NegateAfterAll;
4983 if (Opcode == TargetOpcode::G_OR) {
4984 // Swap the sub-tree that we can negate naturally to the left.
4985 if (!CanNegateL) {
4986 assert(CanNegateR && "at least one side must be negatable");
4987 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4988 assert(!Negate);
4989 std::swap(LHS, RHS);
4990 NegateR = false;
4991 NegateAfterR = true;
4992 } else {
4993 // Negate the left sub-tree if possible, otherwise negate the result.
4994 NegateR = CanNegateR;
4995 NegateAfterR = !CanNegateR;
4996 }
4997 NegateL = true;
4998 NegateAfterAll = !Negate;
4999 } else {
5000 assert(Opcode == TargetOpcode::G_AND &&
5001 "Valid conjunction/disjunction tree");
5002 assert(!Negate && "Valid conjunction/disjunction tree");
5003
5004 NegateL = false;
5005 NegateR = false;
5006 NegateAfterR = false;
5007 NegateAfterAll = false;
5008 }
5009
5010 // Emit sub-trees.
5011 AArch64CC::CondCode RHSCC;
5012 MachineInstr *CmpR =
5013 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
5014 if (NegateAfterR)
5015 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
5017 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
5018 if (NegateAfterAll)
5019 OutCC = AArch64CC::getInvertedCondCode(OutCC);
5020 return CmpL;
5021}
5022
5023MachineInstr *AArch64InstructionSelector::emitConjunction(
5024 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5025 bool DummyCanNegate;
5026 bool DummyMustBeFirst;
5027 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5028 *MIB.getMRI()))
5029 return nullptr;
5030 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5031}
5032
5033bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5034 MachineInstr &CondMI) {
5035 AArch64CC::CondCode AArch64CC;
5036 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5037 if (!ConjMI)
5038 return false;
5039
5040 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5041 SelI.eraseFromParent();
5042 return true;
5043}
5044
5045bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5046 MachineRegisterInfo &MRI = *MIB.getMRI();
5047 // We want to recognize this pattern:
5048 //
5049 // $z = G_FCMP pred, $x, $y
5050 // ...
5051 // $w = G_SELECT $z, $a, $b
5052 //
5053 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5054 // some copies/truncs in between.)
5055 //
5056 // If we see this, then we can emit something like this:
5057 //
5058 // fcmp $x, $y
5059 // fcsel $w, $a, $b, pred
5060 //
5061 // Rather than emitting both of the rather long sequences in the standard
5062 // G_FCMP/G_SELECT select methods.
5063
5064 // First, check if the condition is defined by a compare.
5065 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5066
5067 // We can only fold if all of the defs have one use.
5068 Register CondDefReg = CondDef->getOperand(0).getReg();
5069 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5070 // Unless it's another select.
5071 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5072 if (CondDef == &UI)
5073 continue;
5074 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5075 return false;
5076 }
5077 }
5078
5079 // Is the condition defined by a compare?
5080 unsigned CondOpc = CondDef->getOpcode();
5081 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5082 if (tryOptSelectConjunction(I, *CondDef))
5083 return true;
5084 return false;
5085 }
5086
5088 if (CondOpc == TargetOpcode::G_ICMP) {
5089 auto Pred =
5090 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5092 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5093 CondDef->getOperand(1), MIB);
5094 } else {
5095 // Get the condition code for the select.
5096 auto Pred =
5097 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5098 AArch64CC::CondCode CondCode2;
5099 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5100
5101 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5102 // instructions to emit the comparison.
5103 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5104 // unnecessary.
5105 if (CondCode2 != AArch64CC::AL)
5106 return false;
5107
5108 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5109 CondDef->getOperand(3).getReg(), MIB)) {
5110 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5111 return false;
5112 }
5113 }
5114
5115 // Emit the select.
5116 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5117 I.getOperand(3).getReg(), CondCode, MIB);
5118 I.eraseFromParent();
5119 return true;
5120}
5121
5122MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5123 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5124 MachineIRBuilder &MIRBuilder) const {
5125 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5126 "Unexpected MachineOperand");
5127 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5128 // We want to find this sort of thing:
5129 // x = G_SUB 0, y
5130 // G_ICMP z, x
5131 //
5132 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5133 // e.g:
5134 //
5135 // cmn z, y
5136
5137 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5138 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5139 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5140 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5141 // Given this:
5142 //
5143 // x = G_SUB 0, y
5144 // G_ICMP x, z
5145 //
5146 // Produce this:
5147 //
5148 // cmn y, z
5149 if (isCMN(LHSDef, P, MRI))
5150 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5151
5152 // Same idea here, but with the RHS of the compare instead:
5153 //
5154 // Given this:
5155 //
5156 // x = G_SUB 0, y
5157 // G_ICMP z, x
5158 //
5159 // Produce this:
5160 //
5161 // cmn z, y
5162 if (isCMN(RHSDef, P, MRI))
5163 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5164
5165 // Given this:
5166 //
5167 // z = G_AND x, y
5168 // G_ICMP z, 0
5169 //
5170 // Produce this if the compare is signed:
5171 //
5172 // tst x, y
5173 if (!CmpInst::isUnsigned(P) && LHSDef &&
5174 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5175 // Make sure that the RHS is 0.
5176 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5177 if (!ValAndVReg || ValAndVReg->Value != 0)
5178 return nullptr;
5179
5180 return emitTST(LHSDef->getOperand(1),
5181 LHSDef->getOperand(2), MIRBuilder);
5182 }
5183
5184 return nullptr;
5185}
5186
5187bool AArch64InstructionSelector::selectShuffleVector(
5189 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5190 Register Src1Reg = I.getOperand(1).getReg();
5191 const LLT Src1Ty = MRI.getType(Src1Reg);
5192 Register Src2Reg = I.getOperand(2).getReg();
5193 const LLT Src2Ty = MRI.getType(Src2Reg);
5194 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5195
5196 MachineBasicBlock &MBB = *I.getParent();
5197 MachineFunction &MF = *MBB.getParent();
5198 LLVMContext &Ctx = MF.getFunction().getContext();
5199
5200 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5201 // it's originated from a <1 x T> type. Those should have been lowered into
5202 // G_BUILD_VECTOR earlier.
5203 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5204 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5205 return false;
5206 }
5207
5208 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5209
5211 for (int Val : Mask) {
5212 // For now, any undef indexes we'll just assume to be 0. This should be
5213 // optimized in future, e.g. to select DUP etc.
5214 Val = Val < 0 ? 0 : Val;
5215 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5216 unsigned Offset = Byte + Val * BytesPerElt;
5217 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5218 }
5219 }
5220
5221 // Use a constant pool to load the index vector for TBL.
5222 Constant *CPVal = ConstantVector::get(CstIdxs);
5223 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5224 if (!IndexLoad) {
5225 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5226 return false;
5227 }
5228
5229 if (DstTy.getSizeInBits() != 128) {
5230 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5231 // This case can be done with TBL1.
5233 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5234 if (!Concat) {
5235 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5236 return false;
5237 }
5238
5239 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5240 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5241 IndexLoad->getOperand(0).getReg(), MIB);
5242
5243 auto TBL1 = MIB.buildInstr(
5244 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5245 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5247
5248 auto Copy =
5249 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5250 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5251 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5252 I.eraseFromParent();
5253 return true;
5254 }
5255
5256 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5257 // Q registers for regalloc.
5258 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5259 auto RegSeq = createQTuple(Regs, MIB);
5260 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5261 {RegSeq, IndexLoad->getOperand(0)});
5263 I.eraseFromParent();
5264 return true;
5265}
5266
5267MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5268 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5269 unsigned LaneIdx, const RegisterBank &RB,
5270 MachineIRBuilder &MIRBuilder) const {
5271 MachineInstr *InsElt = nullptr;
5272 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5273 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5274
5275 // Create a register to define with the insert if one wasn't passed in.
5276 if (!DstReg)
5277 DstReg = MRI.createVirtualRegister(DstRC);
5278
5279 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5280 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5281
5282 if (RB.getID() == AArch64::FPRRegBankID) {
5283 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5284 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5285 .addImm(LaneIdx)
5286 .addUse(InsSub->getOperand(0).getReg())
5287 .addImm(0);
5288 } else {
5289 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5290 .addImm(LaneIdx)
5291 .addUse(EltReg);
5292 }
5293
5295 return InsElt;
5296}
5297
5298bool AArch64InstructionSelector::selectUSMovFromExtend(
5300 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5301 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5302 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5303 return false;
5304 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5305 const Register DefReg = MI.getOperand(0).getReg();
5306 const LLT DstTy = MRI.getType(DefReg);
5307 unsigned DstSize = DstTy.getSizeInBits();
5308
5309 if (DstSize != 32 && DstSize != 64)
5310 return false;
5311
5312 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5313 MI.getOperand(1).getReg(), MRI);
5314 int64_t Lane;
5315 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5316 return false;
5317 Register Src0 = Extract->getOperand(1).getReg();
5318
5319 const LLT VecTy = MRI.getType(Src0);
5320 if (VecTy.isScalableVector())
5321 return false;
5322
5323 if (VecTy.getSizeInBits() != 128) {
5324 const MachineInstr *ScalarToVector = emitScalarToVector(
5325 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5326 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5327 Src0 = ScalarToVector->getOperand(0).getReg();
5328 }
5329
5330 unsigned Opcode;
5331 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5332 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5333 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5334 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5335 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5336 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5337 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5338 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5339 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5340 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5341 else
5342 llvm_unreachable("Unexpected type combo for S/UMov!");
5343
5344 // We may need to generate one of these, depending on the type and sign of the
5345 // input:
5346 // DstReg = SMOV Src0, Lane;
5347 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5348 MachineInstr *ExtI = nullptr;
5349 if (DstSize == 64 && !IsSigned) {
5350 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5351 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5352 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5353 .addImm(0)
5354 .addUse(NewReg)
5355 .addImm(AArch64::sub_32);
5356 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5357 } else
5358 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5359
5361 MI.eraseFromParent();
5362 return true;
5363}
5364
5365MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5366 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5367 unsigned int Op;
5368 if (DstSize == 128) {
5369 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5370 return nullptr;
5371 Op = AArch64::MOVIv16b_ns;
5372 } else {
5373 Op = AArch64::MOVIv8b_ns;
5374 }
5375
5376 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5377
5380 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5382 return &*Mov;
5383 }
5384 return nullptr;
5385}
5386
5387MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5388 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5389 bool Inv) {
5390
5391 unsigned int Op;
5392 if (DstSize == 128) {
5393 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5394 return nullptr;
5395 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5396 } else {
5397 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5398 }
5399
5400 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5401 uint64_t Shift;
5402
5405 Shift = 0;
5406 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5408 Shift = 8;
5409 } else
5410 return nullptr;
5411
5412 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5414 return &*Mov;
5415}
5416
5417MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5418 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5419 bool Inv) {
5420
5421 unsigned int Op;
5422 if (DstSize == 128) {
5423 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5424 return nullptr;
5425 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5426 } else {
5427 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5428 }
5429
5430 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5431 uint64_t Shift;
5432
5435 Shift = 0;
5436 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5438 Shift = 8;
5439 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5441 Shift = 16;
5442 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5444 Shift = 24;
5445 } else
5446 return nullptr;
5447
5448 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5450 return &*Mov;
5451}
5452
5453MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5454 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5455
5456 unsigned int Op;
5457 if (DstSize == 128) {
5458 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5459 return nullptr;
5460 Op = AArch64::MOVIv2d_ns;
5461 } else {
5462 Op = AArch64::MOVID;
5463 }
5464
5465 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5468 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5470 return &*Mov;
5471 }
5472 return nullptr;
5473}
5474
5475MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5476 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5477 bool Inv) {
5478
5479 unsigned int Op;
5480 if (DstSize == 128) {
5481 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5482 return nullptr;
5483 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5484 } else {
5485 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5486 }
5487
5488 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5489 uint64_t Shift;
5490
5493 Shift = 264;
5494 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5496 Shift = 272;
5497 } else
5498 return nullptr;
5499
5500 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5502 return &*Mov;
5503}
5504
5505MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5506 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5507
5508 unsigned int Op;
5509 bool IsWide = false;
5510 if (DstSize == 128) {
5511 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5512 return nullptr;
5513 Op = AArch64::FMOVv4f32_ns;
5514 IsWide = true;
5515 } else {
5516 Op = AArch64::FMOVv2f32_ns;
5517 }
5518
5519 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5520
5523 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5525 Op = AArch64::FMOVv2f64_ns;
5526 } else
5527 return nullptr;
5528
5529 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5531 return &*Mov;
5532}
5533
5534bool AArch64InstructionSelector::selectIndexedExtLoad(
5536 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5537 Register Dst = ExtLd.getDstReg();
5538 Register WriteBack = ExtLd.getWritebackReg();
5539 Register Base = ExtLd.getBaseReg();
5540 Register Offset = ExtLd.getOffsetReg();
5541 LLT Ty = MRI.getType(Dst);
5542 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5543 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5544 bool IsPre = ExtLd.isPre();
5545 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5546 bool InsertIntoXReg = false;
5547 bool IsDst64 = Ty.getSizeInBits() == 64;
5548
5549 unsigned Opc = 0;
5550 LLT NewLdDstTy;
5551 LLT s32 = LLT::scalar(32);
5552 LLT s64 = LLT::scalar(64);
5553
5554 if (MemSizeBits == 8) {
5555 if (IsSExt) {
5556 if (IsDst64)
5557 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5558 else
5559 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5560 NewLdDstTy = IsDst64 ? s64 : s32;
5561 } else {
5562 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5563 InsertIntoXReg = IsDst64;
5564 NewLdDstTy = s32;
5565 }
5566 } else if (MemSizeBits == 16) {
5567 if (IsSExt) {
5568 if (IsDst64)
5569 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5570 else
5571 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5572 NewLdDstTy = IsDst64 ? s64 : s32;
5573 } else {
5574 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5575 InsertIntoXReg = IsDst64;
5576 NewLdDstTy = s32;
5577 }
5578 } else if (MemSizeBits == 32) {
5579 if (IsSExt) {
5580 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5581 NewLdDstTy = s64;
5582 } else {
5583 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5584 InsertIntoXReg = IsDst64;
5585 NewLdDstTy = s32;
5586 }
5587 } else {
5588 llvm_unreachable("Unexpected size for indexed load");
5589 }
5590
5591 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5592 return false; // We should be on gpr.
5593
5594 auto Cst = getIConstantVRegVal(Offset, MRI);
5595 if (!Cst)
5596 return false; // Shouldn't happen, but just in case.
5597
5598 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5599 .addImm(Cst->getSExtValue());
5600 LdMI.cloneMemRefs(ExtLd);
5602 // Make sure to select the load with the MemTy as the dest type, and then
5603 // insert into X reg if needed.
5604 if (InsertIntoXReg) {
5605 // Generate a SUBREG_TO_REG.
5606 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5607 .addImm(0)
5608 .addUse(LdMI.getReg(1))
5609 .addImm(AArch64::sub_32);
5610 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5611 MRI);
5612 } else {
5613 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5614 selectCopy(*Copy, TII, MRI, TRI, RBI);
5615 }
5616 MI.eraseFromParent();
5617
5618 return true;
5619}
5620
5621bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5623 auto &Ld = cast<GIndexedLoad>(MI);
5624 Register Dst = Ld.getDstReg();
5625 Register WriteBack = Ld.getWritebackReg();
5626 Register Base = Ld.getBaseReg();
5627 Register Offset = Ld.getOffsetReg();
5628 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5629 "Unexpected type for indexed load");
5630 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5631
5632 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5633 return selectIndexedExtLoad(MI, MRI);
5634
5635 unsigned Opc = 0;
5636 if (Ld.isPre()) {
5637 static constexpr unsigned GPROpcodes[] = {
5638 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5639 AArch64::LDRXpre};
5640 static constexpr unsigned FPROpcodes[] = {
5641 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5642 AArch64::LDRQpre};
5643 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5644 Opc = FPROpcodes[Log2_32(MemSize)];
5645 else
5646 Opc = GPROpcodes[Log2_32(MemSize)];
5647 } else {
5648 static constexpr unsigned GPROpcodes[] = {
5649 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5650 AArch64::LDRXpost};
5651 static constexpr unsigned FPROpcodes[] = {
5652 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5653 AArch64::LDRDpost, AArch64::LDRQpost};
5654 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5655 Opc = FPROpcodes[Log2_32(MemSize)];
5656 else
5657 Opc = GPROpcodes[Log2_32(MemSize)];
5658 }
5659 auto Cst = getIConstantVRegVal(Offset, MRI);
5660 if (!Cst)
5661 return false; // Shouldn't happen, but just in case.
5662 auto LdMI =
5663 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5664 LdMI.cloneMemRefs(Ld);
5666 MI.eraseFromParent();
5667 return true;
5668}
5669
5670bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5672 Register Dst = I.getWritebackReg();
5673 Register Val = I.getValueReg();
5674 Register Base = I.getBaseReg();
5675 Register Offset = I.getOffsetReg();
5676 LLT ValTy = MRI.getType(Val);
5677 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5678
5679 unsigned Opc = 0;
5680 if (I.isPre()) {
5681 static constexpr unsigned GPROpcodes[] = {
5682 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5683 AArch64::STRXpre};
5684 static constexpr unsigned FPROpcodes[] = {
5685 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5686 AArch64::STRQpre};
5687
5688 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5689 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5690 else
5691 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5692 } else {
5693 static constexpr unsigned GPROpcodes[] = {
5694 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5695 AArch64::STRXpost};
5696 static constexpr unsigned FPROpcodes[] = {
5697 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5698 AArch64::STRDpost, AArch64::STRQpost};
5699
5700 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5701 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5702 else
5703 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5704 }
5705
5706 auto Cst = getIConstantVRegVal(Offset, MRI);
5707 if (!Cst)
5708 return false; // Shouldn't happen, but just in case.
5709 auto Str =
5710 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5711 Str.cloneMemRefs(I);
5713 I.eraseFromParent();
5714 return true;
5715}
5716
5718AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5719 MachineIRBuilder &MIRBuilder,
5721 LLT DstTy = MRI.getType(Dst);
5722 unsigned DstSize = DstTy.getSizeInBits();
5723 if (CV->isNullValue()) {
5724 if (DstSize == 128) {
5725 auto Mov =
5726 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5728 return &*Mov;
5729 }
5730
5731 if (DstSize == 64) {
5732 auto Mov =
5733 MIRBuilder
5734 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5735 .addImm(0);
5736 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5737 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5738 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5739 return &*Copy;
5740 }
5741 }
5742
5743 if (CV->getSplatValue()) {
5744 APInt DefBits = APInt::getSplat(
5745 DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
5746 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5747 MachineInstr *NewOp;
5748 bool Inv = false;
5749 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5750 (NewOp =
5751 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5752 (NewOp =
5753 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5754 (NewOp =
5755 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5756 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5757 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5758 return NewOp;
5759
5760 DefBits = ~DefBits;
5761 Inv = true;
5762 if ((NewOp =
5763 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5764 (NewOp =
5765 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5766 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5767 return NewOp;
5768 return nullptr;
5769 };
5770
5771 if (auto *NewOp = TryMOVIWithBits(DefBits))
5772 return NewOp;
5773
5774 // See if a fneg of the constant can be materialized with a MOVI, etc
5775 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5776 unsigned NegOpc) -> MachineInstr * {
5777 // FNegate each sub-element of the constant
5778 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5779 APInt NegBits(DstSize, 0);
5780 unsigned NumElts = DstSize / NumBits;
5781 for (unsigned i = 0; i < NumElts; i++)
5782 NegBits |= Neg << (NumBits * i);
5783 NegBits = DefBits ^ NegBits;
5784
5785 // Try to create the new constants with MOVI, and if so generate a fneg
5786 // for it.
5787 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5788 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5789 NewOp->getOperand(0).setReg(NewDst);
5790 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5791 }
5792 return nullptr;
5793 };
5794 MachineInstr *R;
5795 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5796 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5797 (STI.hasFullFP16() &&
5798 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5799 return R;
5800 }
5801
5802 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5803 if (!CPLoad) {
5804 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5805 return nullptr;
5806 }
5807
5808 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5809 RBI.constrainGenericRegister(
5810 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5811 return &*Copy;
5812}
5813
5814bool AArch64InstructionSelector::tryOptConstantBuildVec(
5816 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5817 unsigned DstSize = DstTy.getSizeInBits();
5818 assert(DstSize <= 128 && "Unexpected build_vec type!");
5819 if (DstSize < 32)
5820 return false;
5821 // Check if we're building a constant vector, in which case we want to
5822 // generate a constant pool load instead of a vector insert sequence.
5824 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5825 // Try to find G_CONSTANT or G_FCONSTANT
5826 auto *OpMI =
5827 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5828 if (OpMI)
5829 Csts.emplace_back(
5830 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5831 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5832 I.getOperand(Idx).getReg(), MRI)))
5833 Csts.emplace_back(
5834 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5835 else
5836 return false;
5837 }
5838 Constant *CV = ConstantVector::get(Csts);
5839 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5840 return false;
5841 I.eraseFromParent();
5842 return true;
5843}
5844
5845bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5847 // Given:
5848 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5849 //
5850 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5851 Register Dst = I.getOperand(0).getReg();
5852 Register EltReg = I.getOperand(1).getReg();
5853 LLT EltTy = MRI.getType(EltReg);
5854 // If the index isn't on the same bank as its elements, then this can't be a
5855 // SUBREG_TO_REG.
5856 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5857 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5858 if (EltRB != DstRB)
5859 return false;
5860 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5861 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5862 }))
5863 return false;
5864 unsigned SubReg;
5865 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5866 if (!EltRC)
5867 return false;
5868 const TargetRegisterClass *DstRC =
5869 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5870 if (!DstRC)
5871 return false;
5872 if (!getSubRegForClass(EltRC, TRI, SubReg))
5873 return false;
5874 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5875 .addImm(0)
5876 .addUse(EltReg)
5877 .addImm(SubReg);
5878 I.eraseFromParent();
5879 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5880 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5881}
5882
5883bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5885 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5886 // Until we port more of the optimized selections, for now just use a vector
5887 // insert sequence.
5888 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5889 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5890 unsigned EltSize = EltTy.getSizeInBits();
5891
5892 if (tryOptConstantBuildVec(I, DstTy, MRI))
5893 return true;
5894 if (tryOptBuildVecToSubregToReg(I, MRI))
5895 return true;
5896
5897 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5898 return false; // Don't support all element types yet.
5899 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5900
5901 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5902 MachineInstr *ScalarToVec =
5903 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5904 I.getOperand(1).getReg(), MIB);
5905 if (!ScalarToVec)
5906 return false;
5907
5908 Register DstVec = ScalarToVec->getOperand(0).getReg();
5909 unsigned DstSize = DstTy.getSizeInBits();
5910
5911 // Keep track of the last MI we inserted. Later on, we might be able to save
5912 // a copy using it.
5913 MachineInstr *PrevMI = ScalarToVec;
5914 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5915 // Note that if we don't do a subregister copy, we can end up making an
5916 // extra register.
5917 Register OpReg = I.getOperand(i).getReg();
5918 // Do not emit inserts for undefs
5919 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5920 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5921 DstVec = PrevMI->getOperand(0).getReg();
5922 }
5923 }
5924
5925 // If DstTy's size in bits is less than 128, then emit a subregister copy
5926 // from DstVec to the last register we've defined.
5927 if (DstSize < 128) {
5928 // Force this to be FPR using the destination vector.
5929 const TargetRegisterClass *RC =
5930 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5931 if (!RC)
5932 return false;
5933 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5934 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5935 return false;
5936 }
5937
5938 unsigned SubReg = 0;
5939 if (!getSubRegForClass(RC, TRI, SubReg))
5940 return false;
5941 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5942 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5943 << "\n");
5944 return false;
5945 }
5946
5947 Register Reg = MRI.createVirtualRegister(RC);
5948 Register DstReg = I.getOperand(0).getReg();
5949
5950 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5951 MachineOperand &RegOp = I.getOperand(1);
5952 RegOp.setReg(Reg);
5953 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5954 } else {
5955 // We either have a vector with all elements (except the first one) undef or
5956 // at least one non-undef non-first element. In the first case, we need to
5957 // constrain the output register ourselves as we may have generated an
5958 // INSERT_SUBREG operation which is a generic operation for which the
5959 // output regclass cannot be automatically chosen.
5960 //
5961 // In the second case, there is no need to do this as it may generate an
5962 // instruction like INSvi32gpr where the regclass can be automatically
5963 // chosen.
5964 //
5965 // Also, we save a copy by re-using the destination register on the final
5966 // insert.
5967 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5969
5970 Register DstReg = PrevMI->getOperand(0).getReg();
5971 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5972 const TargetRegisterClass *RC =
5973 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5974 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5975 }
5976 }
5977
5978 I.eraseFromParent();
5979 return true;
5980}
5981
5982bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5983 unsigned NumVecs,
5984 MachineInstr &I) {
5985 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5986 assert(Opc && "Expected an opcode?");
5987 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5988 auto &MRI = *MIB.getMRI();
5989 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5990 unsigned Size = Ty.getSizeInBits();
5991 assert((Size == 64 || Size == 128) &&
5992 "Destination must be 64 bits or 128 bits?");
5993 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5994 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5995 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5996 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5997 Load.cloneMemRefs(I);
5999 Register SelectedLoadDst = Load->getOperand(0).getReg();
6000 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6001 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
6002 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6003 // Emit the subreg copies and immediately select them.
6004 // FIXME: We should refactor our copy code into an emitCopy helper and
6005 // clean up uses of this pattern elsewhere in the selector.
6006 selectCopy(*Vec, TII, MRI, TRI, RBI);
6007 }
6008 return true;
6009}
6010
6011bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6012 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6013 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6014 assert(Opc && "Expected an opcode?");
6015 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6016 auto &MRI = *MIB.getMRI();
6017 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6018 bool Narrow = Ty.getSizeInBits() == 64;
6019
6020 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
6021 SmallVector<Register, 4> Regs(NumVecs);
6022 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
6023 [](auto MO) { return MO.getReg(); });
6024
6025 if (Narrow) {
6026 transform(Regs, Regs.begin(), [this](Register Reg) {
6027 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6028 ->getOperand(0)
6029 .getReg();
6030 });
6031 Ty = Ty.multiplyElements(2);
6032 }
6033
6034 Register Tuple = createQTuple(Regs, MIB);
6035 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6036 if (!LaneNo)
6037 return false;
6038
6039 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6040 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6041 .addReg(Tuple)
6042 .addImm(LaneNo->getZExtValue())
6043 .addReg(Ptr);
6044 Load.cloneMemRefs(I);
6046 Register SelectedLoadDst = Load->getOperand(0).getReg();
6047 unsigned SubReg = AArch64::qsub0;
6048 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6049 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6050 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6051 : DstOp(I.getOperand(Idx).getReg())},
6052 {})
6053 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6054 Register WideReg = Vec.getReg(0);
6055 // Emit the subreg copies and immediately select them.
6056 selectCopy(*Vec, TII, MRI, TRI, RBI);
6057 if (Narrow &&
6058 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6059 return false;
6060 }
6061 return true;
6062}
6063
6064void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6065 unsigned NumVecs,
6066 unsigned Opc) {
6067 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6068 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6069 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6070
6071 SmallVector<Register, 2> Regs(NumVecs);
6072 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6073 Regs.begin(), [](auto MO) { return MO.getReg(); });
6074
6075 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6076 : createDTuple(Regs, MIB);
6077 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6078 Store.cloneMemRefs(I);
6080}
6081
6082bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6083 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6084 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6085 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6086 bool Narrow = Ty.getSizeInBits() == 64;
6087
6088 SmallVector<Register, 2> Regs(NumVecs);
6089 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6090 Regs.begin(), [](auto MO) { return MO.getReg(); });
6091
6092 if (Narrow)
6093 transform(Regs, Regs.begin(), [this](Register Reg) {
6094 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6095 ->getOperand(0)
6096 .getReg();
6097 });
6098
6099 Register Tuple = createQTuple(Regs, MIB);
6100
6101 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6102 if (!LaneNo)
6103 return false;
6104 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6105 auto Store = MIB.buildInstr(Opc, {}, {})
6106 .addReg(Tuple)
6107 .addImm(LaneNo->getZExtValue())
6108 .addReg(Ptr);
6109 Store.cloneMemRefs(I);
6111 return true;
6112}
6113
6114bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6116 // Find the intrinsic ID.
6117 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6118
6119 const LLT S8 = LLT::scalar(8);
6120 const LLT S16 = LLT::scalar(16);
6121 const LLT S32 = LLT::scalar(32);
6122 const LLT S64 = LLT::scalar(64);
6123 const LLT P0 = LLT::pointer(0, 64);
6124 // Select the instruction.
6125 switch (IntrinID) {
6126 default:
6127 return false;
6128 case Intrinsic::aarch64_ldxp:
6129 case Intrinsic::aarch64_ldaxp: {
6130 auto NewI = MIB.buildInstr(
6131 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6132 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6133 {I.getOperand(3)});
6134 NewI.cloneMemRefs(I);
6136 break;
6137 }
6138 case Intrinsic::aarch64_neon_ld1x2: {
6139 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6140 unsigned Opc = 0;
6141 if (Ty == LLT::fixed_vector(8, S8))
6142 Opc = AArch64::LD1Twov8b;
6143 else if (Ty == LLT::fixed_vector(16, S8))
6144 Opc = AArch64::LD1Twov16b;
6145 else if (Ty == LLT::fixed_vector(4, S16))
6146 Opc = AArch64::LD1Twov4h;
6147 else if (Ty == LLT::fixed_vector(8, S16))
6148 Opc = AArch64::LD1Twov8h;
6149 else if (Ty == LLT::fixed_vector(2, S32))
6150 Opc = AArch64::LD1Twov2s;
6151 else if (Ty == LLT::fixed_vector(4, S32))
6152 Opc = AArch64::LD1Twov4s;
6153 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6154 Opc = AArch64::LD1Twov2d;
6155 else if (Ty == S64 || Ty == P0)
6156 Opc = AArch64::LD1Twov1d;
6157 else
6158 llvm_unreachable("Unexpected type for ld1x2!");
6159 selectVectorLoadIntrinsic(Opc, 2, I);
6160 break;
6161 }
6162 case Intrinsic::aarch64_neon_ld1x3: {
6163 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6164 unsigned Opc = 0;
6165 if (Ty == LLT::fixed_vector(8, S8))
6166 Opc = AArch64::LD1Threev8b;
6167 else if (Ty == LLT::fixed_vector(16, S8))
6168 Opc = AArch64::LD1Threev16b;
6169 else if (Ty == LLT::fixed_vector(4, S16))
6170 Opc = AArch64::LD1Threev4h;
6171 else if (Ty == LLT::fixed_vector(8, S16))
6172 Opc = AArch64::LD1Threev8h;
6173 else if (Ty == LLT::fixed_vector(2, S32))
6174 Opc = AArch64::LD1Threev2s;
6175 else if (Ty == LLT::fixed_vector(4, S32))
6176 Opc = AArch64::LD1Threev4s;
6177 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6178 Opc = AArch64::LD1Threev2d;
6179 else if (Ty == S64 || Ty == P0)
6180 Opc = AArch64::LD1Threev1d;
6181 else
6182 llvm_unreachable("Unexpected type for ld1x3!");
6183 selectVectorLoadIntrinsic(Opc, 3, I);
6184 break;
6185 }
6186 case Intrinsic::aarch64_neon_ld1x4: {
6187 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6188 unsigned Opc = 0;
6189 if (Ty == LLT::fixed_vector(8, S8))
6190 Opc = AArch64::LD1Fourv8b;
6191 else if (Ty == LLT::fixed_vector(16, S8))
6192 Opc = AArch64::LD1Fourv16b;
6193 else if (Ty == LLT::fixed_vector(4, S16))
6194 Opc = AArch64::LD1Fourv4h;
6195 else if (Ty == LLT::fixed_vector(8, S16))
6196 Opc = AArch64::LD1Fourv8h;
6197 else if (Ty == LLT::fixed_vector(2, S32))
6198 Opc = AArch64::LD1Fourv2s;
6199 else if (Ty == LLT::fixed_vector(4, S32))
6200 Opc = AArch64::LD1Fourv4s;
6201 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6202 Opc = AArch64::LD1Fourv2d;
6203 else if (Ty == S64 || Ty == P0)
6204 Opc = AArch64::LD1Fourv1d;
6205 else
6206 llvm_unreachable("Unexpected type for ld1x4!");
6207 selectVectorLoadIntrinsic(Opc, 4, I);
6208 break;
6209 }
6210 case Intrinsic::aarch64_neon_ld2: {
6211 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6212 unsigned Opc = 0;
6213 if (Ty == LLT::fixed_vector(8, S8))
6214 Opc = AArch64::LD2Twov8b;
6215 else if (Ty == LLT::fixed_vector(16, S8))
6216 Opc = AArch64::LD2Twov16b;
6217 else if (Ty == LLT::fixed_vector(4, S16))
6218 Opc = AArch64::LD2Twov4h;
6219 else if (Ty == LLT::fixed_vector(8, S16))
6220 Opc = AArch64::LD2Twov8h;
6221 else if (Ty == LLT::fixed_vector(2, S32))
6222 Opc = AArch64::LD2Twov2s;
6223 else if (Ty == LLT::fixed_vector(4, S32))
6224 Opc = AArch64::LD2Twov4s;
6225 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6226 Opc = AArch64::LD2Twov2d;
6227 else if (Ty == S64 || Ty == P0)
6228 Opc = AArch64::LD1Twov1d;
6229 else
6230 llvm_unreachable("Unexpected type for ld2!");
6231 selectVectorLoadIntrinsic(Opc, 2, I);
6232 break;
6233 }
6234 case Intrinsic::aarch64_neon_ld2lane: {
6235 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6236 unsigned Opc;
6237 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6238 Opc = AArch64::LD2i8;
6239 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6240 Opc = AArch64::LD2i16;
6241 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6242 Opc = AArch64::LD2i32;
6243 else if (Ty == LLT::fixed_vector(2, S64) ||
6244 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6245 Opc = AArch64::LD2i64;
6246 else
6247 llvm_unreachable("Unexpected type for st2lane!");
6248 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6249 return false;
6250 break;
6251 }
6252 case Intrinsic::aarch64_neon_ld2r: {
6253 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6254 unsigned Opc = 0;
6255 if (Ty == LLT::fixed_vector(8, S8))
6256 Opc = AArch64::LD2Rv8b;
6257 else if (Ty == LLT::fixed_vector(16, S8))
6258 Opc = AArch64::LD2Rv16b;
6259 else if (Ty == LLT::fixed_vector(4, S16))
6260 Opc = AArch64::LD2Rv4h;
6261 else if (Ty == LLT::fixed_vector(8, S16))
6262 Opc = AArch64::LD2Rv8h;
6263 else if (Ty == LLT::fixed_vector(2, S32))
6264 Opc = AArch64::LD2Rv2s;
6265 else if (Ty == LLT::fixed_vector(4, S32))
6266 Opc = AArch64::LD2Rv4s;
6267 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6268 Opc = AArch64::LD2Rv2d;
6269 else if (Ty == S64 || Ty == P0)
6270 Opc = AArch64::LD2Rv1d;
6271 else
6272 llvm_unreachable("Unexpected type for ld2r!");
6273 selectVectorLoadIntrinsic(Opc, 2, I);
6274 break;
6275 }
6276 case Intrinsic::aarch64_neon_ld3: {
6277 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6278 unsigned Opc = 0;
6279 if (Ty == LLT::fixed_vector(8, S8))
6280 Opc = AArch64::LD3Threev8b;
6281 else if (Ty == LLT::fixed_vector(16, S8))
6282 Opc = AArch64::LD3Threev16b;
6283 else if (Ty == LLT::fixed_vector(4, S16))
6284 Opc = AArch64::LD3Threev4h;
6285 else if (Ty == LLT::fixed_vector(8, S16))
6286 Opc = AArch64::LD3Threev8h;
6287 else if (Ty == LLT::fixed_vector(2, S32))
6288 Opc = AArch64::LD3Threev2s;
6289 else if (Ty == LLT::fixed_vector(4, S32))
6290 Opc = AArch64::LD3Threev4s;
6291 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6292 Opc = AArch64::LD3Threev2d;
6293 else if (Ty == S64 || Ty == P0)
6294 Opc = AArch64::LD1Threev1d;
6295 else
6296 llvm_unreachable("Unexpected type for ld3!");
6297 selectVectorLoadIntrinsic(Opc, 3, I);
6298 break;
6299 }
6300 case Intrinsic::aarch64_neon_ld3lane: {
6301 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6302 unsigned Opc;
6303 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6304 Opc = AArch64::LD3i8;
6305 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6306 Opc = AArch64::LD3i16;
6307 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6308 Opc = AArch64::LD3i32;
6309 else if (Ty == LLT::fixed_vector(2, S64) ||
6310 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6311 Opc = AArch64::LD3i64;
6312 else
6313 llvm_unreachable("Unexpected type for st3lane!");
6314 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6315 return false;
6316 break;
6317 }
6318 case Intrinsic::aarch64_neon_ld3r: {
6319 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6320 unsigned Opc = 0;
6321 if (Ty == LLT::fixed_vector(8, S8))
6322 Opc = AArch64::LD3Rv8b;
6323 else if (Ty == LLT::fixed_vector(16, S8))
6324 Opc = AArch64::LD3Rv16b;
6325 else if (Ty == LLT::fixed_vector(4, S16))
6326 Opc = AArch64::LD3Rv4h;
6327 else if (Ty == LLT::fixed_vector(8, S16))
6328 Opc = AArch64::LD3Rv8h;
6329 else if (Ty == LLT::fixed_vector(2, S32))
6330 Opc = AArch64::LD3Rv2s;
6331 else if (Ty == LLT::fixed_vector(4, S32))
6332 Opc = AArch64::LD3Rv4s;
6333 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6334 Opc = AArch64::LD3Rv2d;
6335 else if (Ty == S64 || Ty == P0)
6336 Opc = AArch64::LD3Rv1d;
6337 else
6338 llvm_unreachable("Unexpected type for ld3r!");
6339 selectVectorLoadIntrinsic(Opc, 3, I);
6340 break;
6341 }
6342 case Intrinsic::aarch64_neon_ld4: {
6343 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6344 unsigned Opc = 0;
6345 if (Ty == LLT::fixed_vector(8, S8))
6346 Opc = AArch64::LD4Fourv8b;
6347 else if (Ty == LLT::fixed_vector(16, S8))
6348 Opc = AArch64::LD4Fourv16b;
6349 else if (Ty == LLT::fixed_vector(4, S16))
6350 Opc = AArch64::LD4Fourv4h;
6351 else if (Ty == LLT::fixed_vector(8, S16))
6352 Opc = AArch64::LD4Fourv8h;
6353 else if (Ty == LLT::fixed_vector(2, S32))
6354 Opc = AArch64::LD4Fourv2s;
6355 else if (Ty == LLT::fixed_vector(4, S32))
6356 Opc = AArch64::LD4Fourv4s;
6357 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6358 Opc = AArch64::LD4Fourv2d;
6359 else if (Ty == S64 || Ty == P0)
6360 Opc = AArch64::LD1Fourv1d;
6361 else
6362 llvm_unreachable("Unexpected type for ld4!");
6363 selectVectorLoadIntrinsic(Opc, 4, I);
6364 break;
6365 }
6366 case Intrinsic::aarch64_neon_ld4lane: {
6367 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6368 unsigned Opc;
6369 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6370 Opc = AArch64::LD4i8;
6371 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6372 Opc = AArch64::LD4i16;
6373 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6374 Opc = AArch64::LD4i32;
6375 else if (Ty == LLT::fixed_vector(2, S64) ||
6376 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6377 Opc = AArch64::LD4i64;
6378 else
6379 llvm_unreachable("Unexpected type for st4lane!");
6380 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6381 return false;
6382 break;
6383 }
6384 case Intrinsic::aarch64_neon_ld4r: {
6385 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6386 unsigned Opc = 0;
6387 if (Ty == LLT::fixed_vector(8, S8))
6388 Opc = AArch64::LD4Rv8b;
6389 else if (Ty == LLT::fixed_vector(16, S8))
6390 Opc = AArch64::LD4Rv16b;
6391 else if (Ty == LLT::fixed_vector(4, S16))
6392 Opc = AArch64::LD4Rv4h;
6393 else if (Ty == LLT::fixed_vector(8, S16))
6394 Opc = AArch64::LD4Rv8h;
6395 else if (Ty == LLT::fixed_vector(2, S32))
6396 Opc = AArch64::LD4Rv2s;
6397 else if (Ty == LLT::fixed_vector(4, S32))
6398 Opc = AArch64::LD4Rv4s;
6399 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6400 Opc = AArch64::LD4Rv2d;
6401 else if (Ty == S64 || Ty == P0)
6402 Opc = AArch64::LD4Rv1d;
6403 else
6404 llvm_unreachable("Unexpected type for ld4r!");
6405 selectVectorLoadIntrinsic(Opc, 4, I);
6406 break;
6407 }
6408 case Intrinsic::aarch64_neon_st1x2: {
6409 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6410 unsigned Opc;
6411 if (Ty == LLT::fixed_vector(8, S8))
6412 Opc = AArch64::ST1Twov8b;
6413 else if (Ty == LLT::fixed_vector(16, S8))
6414 Opc = AArch64::ST1Twov16b;
6415 else if (Ty == LLT::fixed_vector(4, S16))
6416 Opc = AArch64::ST1Twov4h;
6417 else if (Ty == LLT::fixed_vector(8, S16))
6418 Opc = AArch64::ST1Twov8h;
6419 else if (Ty == LLT::fixed_vector(2, S32))
6420 Opc = AArch64::ST1Twov2s;
6421 else if (Ty == LLT::fixed_vector(4, S32))
6422 Opc = AArch64::ST1Twov4s;
6423 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6424 Opc = AArch64::ST1Twov2d;
6425 else if (Ty == S64 || Ty == P0)
6426 Opc = AArch64::ST1Twov1d;
6427 else
6428 llvm_unreachable("Unexpected type for st1x2!");
6429 selectVectorStoreIntrinsic(I, 2, Opc);
6430 break;
6431 }
6432 case Intrinsic::aarch64_neon_st1x3: {
6433 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6434 unsigned Opc;
6435 if (Ty == LLT::fixed_vector(8, S8))
6436 Opc = AArch64::ST1Threev8b;
6437 else if (Ty == LLT::fixed_vector(16, S8))
6438 Opc = AArch64::ST1Threev16b;
6439 else if (Ty == LLT::fixed_vector(4, S16))
6440 Opc = AArch64::ST1Threev4h;
6441 else if (Ty == LLT::fixed_vector(8, S16))
6442 Opc = AArch64::ST1Threev8h;
6443 else if (Ty == LLT::fixed_vector(2, S32))
6444 Opc = AArch64::ST1Threev2s;
6445 else if (Ty == LLT::fixed_vector(4, S32))
6446 Opc = AArch64::ST1Threev4s;
6447 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6448 Opc = AArch64::ST1Threev2d;
6449 else if (Ty == S64 || Ty == P0)
6450 Opc = AArch64::ST1Threev1d;
6451 else
6452 llvm_unreachable("Unexpected type for st1x3!");
6453 selectVectorStoreIntrinsic(I, 3, Opc);
6454 break;
6455 }
6456 case Intrinsic::aarch64_neon_st1x4: {
6457 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6458 unsigned Opc;
6459 if (Ty == LLT::fixed_vector(8, S8))
6460 Opc = AArch64::ST1Fourv8b;
6461 else if (Ty == LLT::fixed_vector(16, S8))
6462 Opc = AArch64::ST1Fourv16b;
6463 else if (Ty == LLT::fixed_vector(4, S16))
6464 Opc = AArch64::ST1Fourv4h;
6465 else if (Ty == LLT::fixed_vector(8, S16))
6466 Opc = AArch64::ST1Fourv8h;
6467 else if (Ty == LLT::fixed_vector(2, S32))
6468 Opc = AArch64::ST1Fourv2s;
6469 else if (Ty == LLT::fixed_vector(4, S32))
6470 Opc = AArch64::ST1Fourv4s;
6471 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6472 Opc = AArch64::ST1Fourv2d;
6473 else if (Ty == S64 || Ty == P0)
6474 Opc = AArch64::ST1Fourv1d;
6475 else
6476 llvm_unreachable("Unexpected type for st1x4!");
6477 selectVectorStoreIntrinsic(I, 4, Opc);
6478 break;
6479 }
6480 case Intrinsic::aarch64_neon_st2: {
6481 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6482 unsigned Opc;
6483 if (Ty == LLT::fixed_vector(8, S8))
6484 Opc = AArch64::ST2Twov8b;
6485 else if (Ty == LLT::fixed_vector(16, S8))
6486 Opc = AArch64::ST2Twov16b;
6487 else if (Ty == LLT::fixed_vector(4, S16))
6488 Opc = AArch64::ST2Twov4h;
6489 else if (Ty == LLT::fixed_vector(8, S16))
6490 Opc = AArch64::ST2Twov8h;
6491 else if (Ty == LLT::fixed_vector(2, S32))
6492 Opc = AArch64::ST2Twov2s;
6493 else if (Ty == LLT::fixed_vector(4, S32))
6494 Opc = AArch64::ST2Twov4s;
6495 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6496 Opc = AArch64::ST2Twov2d;
6497 else if (Ty == S64 || Ty == P0)
6498 Opc = AArch64::ST1Twov1d;
6499 else
6500 llvm_unreachable("Unexpected type for st2!");
6501 selectVectorStoreIntrinsic(I, 2, Opc);
6502 break;
6503 }
6504 case Intrinsic::aarch64_neon_st3: {
6505 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6506 unsigned Opc;
6507 if (Ty == LLT::fixed_vector(8, S8))
6508 Opc = AArch64::ST3Threev8b;
6509 else if (Ty == LLT::fixed_vector(16, S8))
6510 Opc = AArch64::ST3Threev16b;
6511 else if (Ty == LLT::fixed_vector(4, S16))
6512 Opc = AArch64::ST3Threev4h;
6513 else if (Ty == LLT::fixed_vector(8, S16))
6514 Opc = AArch64::ST3Threev8h;
6515 else if (Ty == LLT::fixed_vector(2, S32))
6516 Opc = AArch64::ST3Threev2s;
6517 else if (Ty == LLT::fixed_vector(4, S32))
6518 Opc = AArch64::ST3Threev4s;
6519 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6520 Opc = AArch64::ST3Threev2d;
6521 else if (Ty == S64 || Ty == P0)
6522 Opc = AArch64::ST1Threev1d;
6523 else
6524 llvm_unreachable("Unexpected type for st3!");
6525 selectVectorStoreIntrinsic(I, 3, Opc);
6526 break;
6527 }
6528 case Intrinsic::aarch64_neon_st4: {
6529 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6530 unsigned Opc;
6531 if (Ty == LLT::fixed_vector(8, S8))
6532 Opc = AArch64::ST4Fourv8b;
6533 else if (Ty == LLT::fixed_vector(16, S8))
6534 Opc = AArch64::ST4Fourv16b;
6535 else if (Ty == LLT::fixed_vector(4, S16))
6536 Opc = AArch64::ST4Fourv4h;
6537 else if (Ty == LLT::fixed_vector(8, S16))
6538 Opc = AArch64::ST4Fourv8h;
6539 else if (Ty == LLT::fixed_vector(2, S32))
6540 Opc = AArch64::ST4Fourv2s;
6541 else if (Ty == LLT::fixed_vector(4, S32))
6542 Opc = AArch64::ST4Fourv4s;
6543 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6544 Opc = AArch64::ST4Fourv2d;
6545 else if (Ty == S64 || Ty == P0)
6546 Opc = AArch64::ST1Fourv1d;
6547 else
6548 llvm_unreachable("Unexpected type for st4!");
6549 selectVectorStoreIntrinsic(I, 4, Opc);
6550 break;
6551 }
6552 case Intrinsic::aarch64_neon_st2lane: {
6553 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6554 unsigned Opc;
6555 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6556 Opc = AArch64::ST2i8;
6557 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6558 Opc = AArch64::ST2i16;
6559 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6560 Opc = AArch64::ST2i32;
6561 else if (Ty == LLT::fixed_vector(2, S64) ||
6562 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6563 Opc = AArch64::ST2i64;
6564 else
6565 llvm_unreachable("Unexpected type for st2lane!");
6566 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6567 return false;
6568 break;
6569 }
6570 case Intrinsic::aarch64_neon_st3lane: {
6571 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6572 unsigned Opc;
6573 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6574 Opc = AArch64::ST3i8;
6575 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6576 Opc = AArch64::ST3i16;
6577 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6578 Opc = AArch64::ST3i32;
6579 else if (Ty == LLT::fixed_vector(2, S64) ||
6580 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6581 Opc = AArch64::ST3i64;
6582 else
6583 llvm_unreachable("Unexpected type for st3lane!");
6584 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6585 return false;
6586 break;
6587 }
6588 case Intrinsic::aarch64_neon_st4lane: {
6589 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6590 unsigned Opc;
6591 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6592 Opc = AArch64::ST4i8;
6593 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6594 Opc = AArch64::ST4i16;
6595 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6596 Opc = AArch64::ST4i32;
6597 else if (Ty == LLT::fixed_vector(2, S64) ||
6598 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6599 Opc = AArch64::ST4i64;
6600 else
6601 llvm_unreachable("Unexpected type for st4lane!");
6602 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6603 return false;
6604 break;
6605 }
6606 case Intrinsic::aarch64_mops_memset_tag: {
6607 // Transform
6608 // %dst:gpr(p0) = \
6609 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6610 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6611 // where %dst is updated, into
6612 // %Rd:GPR64common, %Rn:GPR64) = \
6613 // MOPSMemorySetTaggingPseudo \
6614 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6615 // where Rd and Rn are tied.
6616 // It is expected that %val has been extended to s64 in legalization.
6617 // Note that the order of the size/value operands are swapped.
6618
6619 Register DstDef = I.getOperand(0).getReg();
6620 // I.getOperand(1) is the intrinsic function
6621 Register DstUse = I.getOperand(2).getReg();
6622 Register ValUse = I.getOperand(3).getReg();
6623 Register SizeUse = I.getOperand(4).getReg();
6624
6625 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6626 // Therefore an additional virtual register is requried for the updated size
6627 // operand. This value is not accessible via the semantics of the intrinsic.
6628 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6629
6630 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6631 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6632 Memset.cloneMemRefs(I);
6634 break;
6635 }
6636 }
6637
6638 I.eraseFromParent();
6639 return true;
6640}
6641
6642bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6644 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6645
6646 switch (IntrinID) {
6647 default:
6648 break;
6649 case Intrinsic::aarch64_crypto_sha1h: {
6650 Register DstReg = I.getOperand(0).getReg();
6651 Register SrcReg = I.getOperand(2).getReg();
6652
6653 // FIXME: Should this be an assert?
6654 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6655 MRI.getType(SrcReg).getSizeInBits() != 32)
6656 return false;
6657
6658 // The operation has to happen on FPRs. Set up some new FPR registers for
6659 // the source and destination if they are on GPRs.
6660 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6661 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6662 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6663
6664 // Make sure the copy ends up getting constrained properly.
6665 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6666 AArch64::GPR32RegClass, MRI);
6667 }
6668
6669 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6670 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6671
6672 // Actually insert the instruction.
6673 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6674 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6675
6676 // Did we create a new register for the destination?
6677 if (DstReg != I.getOperand(0).getReg()) {
6678 // Yep. Copy the result of the instruction back into the original
6679 // destination.
6680 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6681 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6682 AArch64::GPR32RegClass, MRI);
6683 }
6684
6685 I.eraseFromParent();
6686 return true;
6687 }
6688 case Intrinsic::ptrauth_resign: {
6689 Register DstReg = I.getOperand(0).getReg();
6690 Register ValReg = I.getOperand(2).getReg();
6691 uint64_t AUTKey = I.getOperand(3).getImm();
6692 Register AUTDisc = I.getOperand(4).getReg();
6693 uint64_t PACKey = I.getOperand(5).getImm();
6694 Register PACDisc = I.getOperand(6).getReg();
6695
6696 Register AUTAddrDisc = AUTDisc;
6697 uint16_t AUTConstDiscC = 0;
6698 std::tie(AUTConstDiscC, AUTAddrDisc) =
6700
6701 Register PACAddrDisc = PACDisc;
6702 uint16_t PACConstDiscC = 0;
6703 std::tie(PACConstDiscC, PACAddrDisc) =
6705
6706 MIB.buildCopy({AArch64::X16}, {ValReg});
6707 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6708 MIB.buildInstr(AArch64::AUTPAC)
6709 .addImm(AUTKey)
6710 .addImm(AUTConstDiscC)
6711 .addUse(AUTAddrDisc)
6712 .addImm(PACKey)
6713 .addImm(PACConstDiscC)
6714 .addUse(PACAddrDisc)
6715 .constrainAllUses(TII, TRI, RBI);
6716 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6717
6718 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6719 I.eraseFromParent();
6720 return true;
6721 }
6722 case Intrinsic::ptrauth_auth: {
6723 Register DstReg = I.getOperand(0).getReg();
6724 Register ValReg = I.getOperand(2).getReg();
6725 uint64_t AUTKey = I.getOperand(3).getImm();
6726 Register AUTDisc = I.getOperand(4).getReg();
6727
6728 Register AUTAddrDisc = AUTDisc;
6729 uint16_t AUTConstDiscC = 0;
6730 std::tie(AUTConstDiscC, AUTAddrDisc) =
6732
6733 MIB.buildCopy({AArch64::X16}, {ValReg});
6734 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6735 MIB.buildInstr(AArch64::AUT)
6736 .addImm(AUTKey)
6737 .addImm(AUTConstDiscC)
6738 .addUse(AUTAddrDisc)
6739 .constrainAllUses(TII, TRI, RBI);
6740 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6741
6742 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6743 I.eraseFromParent();
6744 return true;
6745 }
6746 case Intrinsic::frameaddress:
6747 case Intrinsic::returnaddress: {
6748 MachineFunction &MF = *I.getParent()->getParent();
6749 MachineFrameInfo &MFI = MF.getFrameInfo();
6750
6751 unsigned Depth = I.getOperand(2).getImm();
6752 Register DstReg = I.getOperand(0).getReg();
6753 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6754
6755 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6756 if (!MFReturnAddr) {
6757 // Insert the copy from LR/X30 into the entry block, before it can be
6758 // clobbered by anything.
6759 MFI.setReturnAddressIsTaken(true);
6760 MFReturnAddr = getFunctionLiveInPhysReg(
6761 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6762 }
6763
6764 if (STI.hasPAuth()) {
6765 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6766 } else {
6767 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6768 MIB.buildInstr(AArch64::XPACLRI);
6769 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6770 }
6771
6772 I.eraseFromParent();
6773 return true;
6774 }
6775
6776 MFI.setFrameAddressIsTaken(true);
6777 Register FrameAddr(AArch64::FP);
6778 while (Depth--) {
6779 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6780 auto Ldr =
6781 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6783 FrameAddr = NextFrame;
6784 }
6785
6786 if (IntrinID == Intrinsic::frameaddress)
6787 MIB.buildCopy({DstReg}, {FrameAddr});
6788 else {
6789 MFI.setReturnAddressIsTaken(true);
6790
6791 if (STI.hasPAuth()) {
6792 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6793 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6794 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6795 } else {
6796 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6797 .addImm(1);
6798 MIB.buildInstr(AArch64::XPACLRI);
6799 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6800 }
6801 }
6802
6803 I.eraseFromParent();
6804 return true;
6805 }
6806 case Intrinsic::aarch64_neon_tbl2:
6807 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6808 return true;
6809 case Intrinsic::aarch64_neon_tbl3:
6810 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6811 false);
6812 return true;
6813 case Intrinsic::aarch64_neon_tbl4:
6814 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6815 return true;
6816 case Intrinsic::aarch64_neon_tbx2:
6817 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6818 return true;
6819 case Intrinsic::aarch64_neon_tbx3:
6820 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6821 return true;
6822 case Intrinsic::aarch64_neon_tbx4:
6823 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6824 return true;
6825 case Intrinsic::swift_async_context_addr:
6826 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6827 {Register(AArch64::FP)})
6828 .addImm(8)
6829 .addImm(0);
6831
6833 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6834 I.eraseFromParent();
6835 return true;
6836 }
6837 return false;
6838}
6839
6840// G_PTRAUTH_GLOBAL_VALUE lowering
6841//
6842// We have 3 lowering alternatives to choose from:
6843// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6844// If the GV doesn't need a GOT load (i.e., is locally defined)
6845// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6846//
6847// - LOADgotPAC: similar to LOADgot, with added PAC.
6848// If the GV needs a GOT load, materialize the pointer using the usual
6849// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6850// section is assumed to be read-only (for example, via relro mechanism). See
6851// LowerMOVaddrPAC.
6852//
6853// - LOADauthptrstatic: similar to LOADgot, but use a
6854// special stub slot instead of a GOT slot.
6855// Load a signed pointer for symbol 'sym' from a stub slot named
6856// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6857// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6858// .data with an
6859// @AUTH relocation. See LowerLOADauthptrstatic.
6860//
6861// All 3 are pseudos that are expand late to longer sequences: this lets us
6862// provide integrity guarantees on the to-be-signed intermediate values.
6863//
6864// LOADauthptrstatic is undesirable because it requires a large section filled
6865// with often similarly-signed pointers, making it a good harvesting target.
6866// Thus, it's only used for ptrauth references to extern_weak to avoid null
6867// checks.
6868
6869bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6871 Register DefReg = I.getOperand(0).getReg();
6872 Register Addr = I.getOperand(1).getReg();
6873 uint64_t Key = I.getOperand(2).getImm();
6874 Register AddrDisc = I.getOperand(3).getReg();
6875 uint64_t Disc = I.getOperand(4).getImm();
6876 int64_t Offset = 0;
6877
6878 if (Key > AArch64PACKey::LAST)
6879 report_fatal_error("key in ptrauth global out of range [0, " +
6880 Twine((int)AArch64PACKey::LAST) + "]");
6881
6882 // Blend only works if the integer discriminator is 16-bit wide.
6883 if (!isUInt<16>(Disc))
6885 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6886
6887 // Choosing between 3 lowering alternatives is target-specific.
6888 if (!STI.isTargetELF() && !STI.isTargetMachO())
6889 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6890
6891 if (!MRI.hasOneDef(Addr))
6892 return false;
6893
6894 // First match any offset we take from the real global.
6895 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6896 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6897 Register OffsetReg = DefMI->getOperand(2).getReg();
6898 if (!MRI.hasOneDef(OffsetReg))
6899 return false;
6900 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6901 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6902 return false;
6903
6904 Addr = DefMI->getOperand(1).getReg();
6905 if (!MRI.hasOneDef(Addr))
6906 return false;
6907
6908 DefMI = &*MRI.def_instr_begin(Addr);
6909 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6910 }
6911
6912 // We should be left with a genuine unauthenticated GlobalValue.
6913 const GlobalValue *GV;
6914 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6915 GV = DefMI->getOperand(1).getGlobal();
6917 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6918 GV = DefMI->getOperand(2).getGlobal();
6920 } else {
6921 return false;
6922 }
6923
6924 MachineIRBuilder MIB(I);
6925
6926 // Classify the reference to determine whether it needs a GOT load.
6927 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6928 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6929 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6930 "unsupported non-GOT op flags on ptrauth global reference");
6931 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6932 "unsupported non-GOT reference to weak ptrauth global");
6933
6934 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6935 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6936
6937 // Non-extern_weak:
6938 // - No GOT load needed -> MOVaddrPAC
6939 // - GOT load for non-extern_weak -> LOADgotPAC
6940 // Note that we disallow extern_weak refs to avoid null checks later.
6941 if (!GV->hasExternalWeakLinkage()) {
6942 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6943 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6944 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6946 .addImm(Key)
6947 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6948 .addImm(Disc)
6949 .constrainAllUses(TII, TRI, RBI);
6950 MIB.buildCopy(DefReg, Register(AArch64::X16));
6951 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6952 I.eraseFromParent();
6953 return true;
6954 }
6955
6956 // extern_weak -> LOADauthptrstatic
6957
6958 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6959 // offset alone as a pointer if the symbol wasn't available, which would
6960 // probably break null checks in users. Ptrauth complicates things further:
6961 // error out.
6962 if (Offset != 0)
6964 "unsupported non-zero offset in weak ptrauth global reference");
6965
6966 if (HasAddrDisc)
6967 report_fatal_error("unsupported weak addr-div ptrauth global");
6968
6969 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6970 .addGlobalAddress(GV, Offset)
6971 .addImm(Key)
6972 .addImm(Disc);
6973 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6974
6975 I.eraseFromParent();
6976 return true;
6977}
6978
6979void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6981 unsigned NumVec, unsigned Opc1,
6982 unsigned Opc2, bool isExt) {
6983 Register DstReg = I.getOperand(0).getReg();
6984 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6985
6986 // Create the REG_SEQUENCE
6988 for (unsigned i = 0; i < NumVec; i++)
6989 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6990 Register RegSeq = createQTuple(Regs, MIB);
6991
6992 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6994 if (isExt) {
6995 Register Reg = I.getOperand(2).getReg();
6996 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6997 } else
6998 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
7000 I.eraseFromParent();
7001}
7002
7004AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
7005 auto MaybeImmed = getImmedFromMO(Root);
7006 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7007 return std::nullopt;
7008 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
7009 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7010}
7011
7013AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
7014 auto MaybeImmed = getImmedFromMO(Root);
7015 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7016 return std::nullopt;
7017 uint64_t Enc = 31 - *MaybeImmed;
7018 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7019}
7020
7022AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7023 auto MaybeImmed = getImmedFromMO(Root);
7024 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7025 return std::nullopt;
7026 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7027 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7028}
7029
7031AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7032 auto MaybeImmed = getImmedFromMO(Root);
7033 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7034 return std::nullopt;
7035 uint64_t Enc = 63 - *MaybeImmed;
7036 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7037}
7038
7039/// Helper to select an immediate value that can be represented as a 12-bit
7040/// value shifted left by either 0 or 12. If it is possible to do so, return
7041/// the immediate and shift value. If not, return std::nullopt.
7042///
7043/// Used by selectArithImmed and selectNegArithImmed.
7045AArch64InstructionSelector::select12BitValueWithLeftShift(
7046 uint64_t Immed) const {
7047 unsigned ShiftAmt;
7048 if (Immed >> 12 == 0) {
7049 ShiftAmt = 0;
7050 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7051 ShiftAmt = 12;
7052 Immed = Immed >> 12;
7053 } else
7054 return std::nullopt;
7055
7056 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7057 return {{
7058 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7059 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7060 }};
7061}
7062
7063/// SelectArithImmed - Select an immediate value that can be represented as
7064/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7065/// Val set to the 12-bit value and Shift set to the shifter operand.
7067AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7068 // This function is called from the addsub_shifted_imm ComplexPattern,
7069 // which lists [imm] as the list of opcode it's interested in, however
7070 // we still need to check whether the operand is actually an immediate
7071 // here because the ComplexPattern opcode list is only used in
7072 // root-level opcode matching.
7073 auto MaybeImmed = getImmedFromMO(Root);
7074 if (MaybeImmed == std::nullopt)
7075 return std::nullopt;
7076 return select12BitValueWithLeftShift(*MaybeImmed);
7077}
7078
7079/// SelectNegArithImmed - As above, but negates the value before trying to
7080/// select it.
7082AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7083 // We need a register here, because we need to know if we have a 64 or 32
7084 // bit immediate.
7085 if (!Root.isReg())
7086 return std::nullopt;
7087 auto MaybeImmed = getImmedFromMO(Root);
7088 if (MaybeImmed == std::nullopt)
7089 return std::nullopt;
7090 uint64_t Immed = *MaybeImmed;
7091
7092 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7093 // have the opposite effect on the C flag, so this pattern mustn't match under
7094 // those circumstances.
7095 if (Immed == 0)
7096 return std::nullopt;
7097
7098 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7099 // the root.
7101 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7102 Immed = ~((uint32_t)Immed) + 1;
7103 else
7104 Immed = ~Immed + 1ULL;
7105
7106 if (Immed & 0xFFFFFFFFFF000000ULL)
7107 return std::nullopt;
7108
7109 Immed &= 0xFFFFFFULL;
7110 return select12BitValueWithLeftShift(Immed);
7111}
7112
7113/// Checks if we are sure that folding MI into load/store addressing mode is
7114/// beneficial or not.
7115///
7116/// Returns:
7117/// - true if folding MI would be beneficial.
7118/// - false if folding MI would be bad.
7119/// - std::nullopt if it is not sure whether folding MI is beneficial.
7120///
7121/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7122///
7123/// %13:gpr(s64) = G_CONSTANT i64 1
7124/// %8:gpr(s64) = G_SHL %6, %13(s64)
7125/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7126/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7127std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7128 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7129 if (MI.getOpcode() == AArch64::G_SHL) {
7130 // Address operands with shifts are free, except for running on subtargets
7131 // with AddrLSLSlow14.
7132 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7133 MI.getOperand(2).getReg(), MRI)) {
7134 const APInt ShiftVal = ValAndVeg->Value;
7135
7136 // Don't fold if we know this will be slow.
7137 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7138 }
7139 }
7140 return std::nullopt;
7141}
7142
7143/// Return true if it is worth folding MI into an extended register. That is,
7144/// if it's safe to pull it into the addressing mode of a load or store as a
7145/// shift.
7146/// \p IsAddrOperand whether the def of MI is used as an address operand
7147/// (e.g. feeding into an LDR/STR).
7148bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7150 bool IsAddrOperand) const {
7151
7152 // Always fold if there is one use, or if we're optimizing for size.
7153 Register DefReg = MI.getOperand(0).getReg();
7154 if (MRI.hasOneNonDBGUse(DefReg) ||
7155 MI.getParent()->getParent()->getFunction().hasOptSize())
7156 return true;
7157
7158 if (IsAddrOperand) {
7159 // If we are already sure that folding MI is good or bad, return the result.
7160 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7161 return *Worth;
7162
7163 // Fold G_PTR_ADD if its offset operand can be folded
7164 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7165 MachineInstr *OffsetInst =
7166 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7167
7168 // Note, we already know G_PTR_ADD is used by at least two instructions.
7169 // If we are also sure about whether folding is beneficial or not,
7170 // return the result.
7171 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7172 return *Worth;
7173 }
7174 }
7175
7176 // FIXME: Consider checking HasALULSLFast as appropriate.
7177
7178 // We have a fastpath, so folding a shift in and potentially computing it
7179 // many times may be beneficial. Check if this is only used in memory ops.
7180 // If it is, then we should fold.
7181 return all_of(MRI.use_nodbg_instructions(DefReg),
7182 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7183}
7184
7186 switch (Type) {
7187 case AArch64_AM::SXTB:
7188 case AArch64_AM::SXTH:
7189 case AArch64_AM::SXTW:
7190 return true;
7191 default:
7192 return false;
7193 }
7194}
7195
7197AArch64InstructionSelector::selectExtendedSHL(
7199 unsigned SizeInBytes, bool WantsExt) const {
7200 assert(Base.isReg() && "Expected base to be a register operand");
7201 assert(Offset.isReg() && "Expected offset to be a register operand");
7202
7204 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7205
7206 unsigned OffsetOpc = OffsetInst->getOpcode();
7207 bool LookedThroughZExt = false;
7208 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7209 // Try to look through a ZEXT.
7210 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7211 return std::nullopt;
7212
7213 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7214 OffsetOpc = OffsetInst->getOpcode();
7215 LookedThroughZExt = true;
7216
7217 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7218 return std::nullopt;
7219 }
7220 // Make sure that the memory op is a valid size.
7221 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7222 if (LegalShiftVal == 0)
7223 return std::nullopt;
7224 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7225 return std::nullopt;
7226
7227 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7228 // register we will offset is the LHS, and the register containing the
7229 // constant is the RHS.
7230 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7231 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7232 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7233 if (!ValAndVReg) {
7234 // We didn't get a constant on the RHS. If the opcode is a shift, then
7235 // we're done.
7236 if (OffsetOpc == TargetOpcode::G_SHL)
7237 return std::nullopt;
7238
7239 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7240 std::swap(OffsetReg, ConstantReg);
7241 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7242 if (!ValAndVReg)
7243 return std::nullopt;
7244 }
7245
7246 // The value must fit into 3 bits, and must be positive. Make sure that is
7247 // true.
7248 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7249
7250 // Since we're going to pull this into a shift, the constant value must be
7251 // a power of 2. If we got a multiply, then we need to check this.
7252 if (OffsetOpc == TargetOpcode::G_MUL) {
7253 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7254 return std::nullopt;
7255
7256 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7257 ImmVal = Log2_32(ImmVal);
7258 }
7259
7260 if ((ImmVal & 0x7) != ImmVal)
7261 return std::nullopt;
7262
7263 // We are only allowed to shift by LegalShiftVal. This shift value is built
7264 // into the instruction, so we can't just use whatever we want.
7265 if (ImmVal != LegalShiftVal)
7266 return std::nullopt;
7267
7268 unsigned SignExtend = 0;
7269 if (WantsExt) {
7270 // Check if the offset is defined by an extend, unless we looked through a
7271 // G_ZEXT earlier.
7272 if (!LookedThroughZExt) {
7273 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7274 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7276 return std::nullopt;
7277
7278 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7279 // We only support SXTW for signed extension here.
7280 if (SignExtend && Ext != AArch64_AM::SXTW)
7281 return std::nullopt;
7282 OffsetReg = ExtInst->getOperand(1).getReg();
7283 }
7284
7285 // Need a 32-bit wide register here.
7286 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7287 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7288 }
7289
7290 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7291 // offset. Signify that we are shifting by setting the shift flag to 1.
7292 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7293 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7294 [=](MachineInstrBuilder &MIB) {
7295 // Need to add both immediates here to make sure that they are both
7296 // added to the instruction.
7297 MIB.addImm(SignExtend);
7298 MIB.addImm(1);
7299 }}};
7300}
7301
7302/// This is used for computing addresses like this:
7303///
7304/// ldr x1, [x2, x3, lsl #3]
7305///
7306/// Where x2 is the base register, and x3 is an offset register. The shift-left
7307/// is a constant value specific to this load instruction. That is, we'll never
7308/// see anything other than a 3 here (which corresponds to the size of the
7309/// element being loaded.)
7311AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7312 MachineOperand &Root, unsigned SizeInBytes) const {
7313 if (!Root.isReg())
7314 return std::nullopt;
7316
7317 // We want to find something like this:
7318 //
7319 // val = G_CONSTANT LegalShiftVal
7320 // shift = G_SHL off_reg val
7321 // ptr = G_PTR_ADD base_reg shift
7322 // x = G_LOAD ptr
7323 //
7324 // And fold it into this addressing mode:
7325 //
7326 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7327
7328 // Check if we can find the G_PTR_ADD.
7329 MachineInstr *PtrAdd =
7330 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7331 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7332 return std::nullopt;
7333
7334 // Now, try to match an opcode which will match our specific offset.
7335 // We want a G_SHL or a G_MUL.
7336 MachineInstr *OffsetInst =
7338 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7339 OffsetInst->getOperand(0), SizeInBytes,
7340 /*WantsExt=*/false);
7341}
7342
7343/// This is used for computing addresses like this:
7344///
7345/// ldr x1, [x2, x3]
7346///
7347/// Where x2 is the base register, and x3 is an offset register.
7348///
7349/// When possible (or profitable) to fold a G_PTR_ADD into the address
7350/// calculation, this will do so. Otherwise, it will return std::nullopt.
7352AArch64InstructionSelector::selectAddrModeRegisterOffset(
7353 MachineOperand &Root) const {
7355
7356 // We need a GEP.
7357 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7358 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7359 return std::nullopt;
7360
7361 // If this is used more than once, let's not bother folding.
7362 // TODO: Check if they are memory ops. If they are, then we can still fold
7363 // without having to recompute anything.
7364 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7365 return std::nullopt;
7366
7367 // Base is the GEP's LHS, offset is its RHS.
7368 return {{[=](MachineInstrBuilder &MIB) {
7369 MIB.addUse(Gep->getOperand(1).getReg());
7370 },
7371 [=](MachineInstrBuilder &MIB) {
7372 MIB.addUse(Gep->getOperand(2).getReg());
7373 },
7374 [=](MachineInstrBuilder &MIB) {
7375 // Need to add both immediates here to make sure that they are both
7376 // added to the instruction.
7377 MIB.addImm(0);
7378 MIB.addImm(0);
7379 }}};
7380}
7381
7382/// This is intended to be equivalent to selectAddrModeXRO in
7383/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7385AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7386 unsigned SizeInBytes) const {
7388 if (!Root.isReg())
7389 return std::nullopt;
7390 MachineInstr *PtrAdd =
7391 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7392 if (!PtrAdd)
7393 return std::nullopt;
7394
7395 // Check for an immediates which cannot be encoded in the [base + imm]
7396 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7397 // end up with code like:
7398 //
7399 // mov x0, wide
7400 // add x1 base, x0
7401 // ldr x2, [x1, x0]
7402 //
7403 // In this situation, we can use the [base, xreg] addressing mode to save an
7404 // add/sub:
7405 //
7406 // mov x0, wide
7407 // ldr x2, [base, x0]
7408 auto ValAndVReg =
7410 if (ValAndVReg) {
7411 unsigned Scale = Log2_32(SizeInBytes);
7412 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7413
7414 // Skip immediates that can be selected in the load/store addresing
7415 // mode.
7416 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7417 ImmOff < (0x1000 << Scale))
7418 return std::nullopt;
7419
7420 // Helper lambda to decide whether or not it is preferable to emit an add.
7421 auto isPreferredADD = [](int64_t ImmOff) {
7422 // Constants in [0x0, 0xfff] can be encoded in an add.
7423 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7424 return true;
7425
7426 // Can it be encoded in an add lsl #12?
7427 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7428 return false;
7429
7430 // It can be encoded in an add lsl #12, but we may not want to. If it is
7431 // possible to select this as a single movz, then prefer that. A single
7432 // movz is faster than an add with a shift.
7433 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7434 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7435 };
7436
7437 // If the immediate can be encoded in a single add/sub, then bail out.
7438 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7439 return std::nullopt;
7440 }
7441
7442 // Try to fold shifts into the addressing mode.
7443 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7444 if (AddrModeFns)
7445 return AddrModeFns;
7446
7447 // If that doesn't work, see if it's possible to fold in registers from
7448 // a GEP.
7449 return selectAddrModeRegisterOffset(Root);
7450}
7451
7452/// This is used for computing addresses like this:
7453///
7454/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7455///
7456/// Where we have a 64-bit base register, a 32-bit offset register, and an
7457/// extend (which may or may not be signed).
7459AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7460 unsigned SizeInBytes) const {
7462
7463 MachineInstr *PtrAdd =
7464 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7465 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7466 return std::nullopt;
7467
7468 MachineOperand &LHS = PtrAdd->getOperand(1);
7469 MachineOperand &RHS = PtrAdd->getOperand(2);
7470 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7471
7472 // The first case is the same as selectAddrModeXRO, except we need an extend.
7473 // In this case, we try to find a shift and extend, and fold them into the
7474 // addressing mode.
7475 //
7476 // E.g.
7477 //
7478 // off_reg = G_Z/S/ANYEXT ext_reg
7479 // val = G_CONSTANT LegalShiftVal
7480 // shift = G_SHL off_reg val
7481 // ptr = G_PTR_ADD base_reg shift
7482 // x = G_LOAD ptr
7483 //
7484 // In this case we can get a load like this:
7485 //
7486 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7487 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7488 SizeInBytes, /*WantsExt=*/true);
7489 if (ExtendedShl)
7490 return ExtendedShl;
7491
7492 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7493 //
7494 // e.g.
7495 // ldr something, [base_reg, ext_reg, sxtw]
7496 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7497 return std::nullopt;
7498
7499 // Check if this is an extend. We'll get an extend type if it is.
7501 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7503 return std::nullopt;
7504
7505 // Need a 32-bit wide register.
7506 MachineIRBuilder MIB(*PtrAdd);
7507 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7508 AArch64::GPR32RegClass, MIB);
7509 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7510
7511 // Base is LHS, offset is ExtReg.
7512 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7513 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7514 [=](MachineInstrBuilder &MIB) {
7515 MIB.addImm(SignExtend);
7516 MIB.addImm(0);
7517 }}};
7518}
7519
7520/// Select a "register plus unscaled signed 9-bit immediate" address. This
7521/// should only match when there is an offset that is not valid for a scaled
7522/// immediate addressing mode. The "Size" argument is the size in bytes of the
7523/// memory reference, which is needed here to know what is valid for a scaled
7524/// immediate.
7526AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7527 unsigned Size) const {
7529 Root.getParent()->getParent()->getParent()->getRegInfo();
7530
7531 if (!Root.isReg())
7532 return std::nullopt;
7533
7534 if (!isBaseWithConstantOffset(Root, MRI))
7535 return std::nullopt;
7536
7537 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7538
7539 MachineOperand &OffImm = RootDef->getOperand(2);
7540 if (!OffImm.isReg())
7541 return std::nullopt;
7542 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7543 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7544 return std::nullopt;
7545 int64_t RHSC;
7546 MachineOperand &RHSOp1 = RHS->getOperand(1);
7547 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7548 return std::nullopt;
7549 RHSC = RHSOp1.getCImm()->getSExtValue();
7550
7551 if (RHSC >= -256 && RHSC < 256) {
7552 MachineOperand &Base = RootDef->getOperand(1);
7553 return {{
7554 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7555 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7556 }};
7557 }
7558 return std::nullopt;
7559}
7560
7562AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7563 unsigned Size,
7564 MachineRegisterInfo &MRI) const {
7565 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7566 return std::nullopt;
7567 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7568 if (Adrp.getOpcode() != AArch64::ADRP)
7569 return std::nullopt;
7570
7571 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7572 auto Offset = Adrp.getOperand(1).getOffset();
7573 if (Offset % Size != 0)
7574 return std::nullopt;
7575
7576 auto GV = Adrp.getOperand(1).getGlobal();
7577 if (GV->isThreadLocal())
7578 return std::nullopt;
7579
7580 auto &MF = *RootDef.getParent()->getParent();
7581 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7582 return std::nullopt;
7583
7584 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7585 MachineIRBuilder MIRBuilder(RootDef);
7586 Register AdrpReg = Adrp.getOperand(0).getReg();
7587 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7588 [=](MachineInstrBuilder &MIB) {
7589 MIB.addGlobalAddress(GV, Offset,
7590 OpFlags | AArch64II::MO_PAGEOFF |
7592 }}};
7593}
7594
7595/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7596/// "Size" argument is the size in bytes of the memory reference, which
7597/// determines the scale.
7599AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7600 unsigned Size) const {
7601 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7603
7604 if (!Root.isReg())
7605 return std::nullopt;
7606
7607 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7608 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7609 return {{
7610 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7611 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7612 }};
7613 }
7614
7616 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7617 if (CM == CodeModel::Small) {
7618 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7619 if (OpFns)
7620 return OpFns;
7621 }
7622
7623 if (isBaseWithConstantOffset(Root, MRI)) {
7624 MachineOperand &LHS = RootDef->getOperand(1);
7625 MachineOperand &RHS = RootDef->getOperand(2);
7626 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7627 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7628
7629 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7630 unsigned Scale = Log2_32(Size);
7631 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7632 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7633 return {{
7634 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7635 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7636 }};
7637
7638 return {{
7639 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7640 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7641 }};
7642 }
7643 }
7644
7645 // Before falling back to our general case, check if the unscaled
7646 // instructions can handle this. If so, that's preferable.
7647 if (selectAddrModeUnscaled(Root, Size))
7648 return std::nullopt;
7649
7650 return {{
7651 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7652 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7653 }};
7654}
7655
7656/// Given a shift instruction, return the correct shift type for that
7657/// instruction.
7659 switch (MI.getOpcode()) {
7660 default:
7662 case TargetOpcode::G_SHL:
7663 return AArch64_AM::LSL;
7664 case TargetOpcode::G_LSHR:
7665 return AArch64_AM::LSR;
7666 case TargetOpcode::G_ASHR:
7667 return AArch64_AM::ASR;
7668 case TargetOpcode::G_ROTR:
7669 return AArch64_AM::ROR;
7670 }
7671}
7672
7673/// Select a "shifted register" operand. If the value is not shifted, set the
7674/// shift operand to a default value of "lsl 0".
7676AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7677 bool AllowROR) const {
7678 if (!Root.isReg())
7679 return std::nullopt;
7681 Root.getParent()->getParent()->getParent()->getRegInfo();
7682
7683 // Check if the operand is defined by an instruction which corresponds to
7684 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7685 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7687 if (ShType == AArch64_AM::InvalidShiftExtend)
7688 return std::nullopt;
7689 if (ShType == AArch64_AM::ROR && !AllowROR)
7690 return std::nullopt;
7691 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7692 return std::nullopt;
7693
7694 // Need an immediate on the RHS.
7695 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7696 auto Immed = getImmedFromMO(ShiftRHS);
7697 if (!Immed)
7698 return std::nullopt;
7699
7700 // We have something that we can fold. Fold in the shift's LHS and RHS into
7701 // the instruction.
7702 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7703 Register ShiftReg = ShiftLHS.getReg();
7704
7705 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7706 unsigned Val = *Immed & (NumBits - 1);
7707 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7708
7709 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7710 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7711}
7712
7713AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7714 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7715 unsigned Opc = MI.getOpcode();
7716
7717 // Handle explicit extend instructions first.
7718 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7719 unsigned Size;
7720 if (Opc == TargetOpcode::G_SEXT)
7721 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7722 else
7723 Size = MI.getOperand(2).getImm();
7724 assert(Size != 64 && "Extend from 64 bits?");
7725 switch (Size) {
7726 case 8:
7727 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7728 case 16:
7729 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7730 case 32:
7731 return AArch64_AM::SXTW;
7732 default:
7734 }
7735 }
7736
7737 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7738 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7739 assert(Size != 64 && "Extend from 64 bits?");
7740 switch (Size) {
7741 case 8:
7742 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7743 case 16:
7744 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7745 case 32:
7746 return AArch64_AM::UXTW;
7747 default:
7749 }
7750 }
7751
7752 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7753 // on the RHS.
7754 if (Opc != TargetOpcode::G_AND)
7756
7757 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7758 if (!MaybeAndMask)
7760 uint64_t AndMask = *MaybeAndMask;
7761 switch (AndMask) {
7762 default:
7764 case 0xFF:
7765 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7766 case 0xFFFF:
7767 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7768 case 0xFFFFFFFF:
7769 return AArch64_AM::UXTW;
7770 }
7771}
7772
7773Register AArch64InstructionSelector::moveScalarRegClass(
7774 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7775 MachineRegisterInfo &MRI = *MIB.getMRI();
7776 auto Ty = MRI.getType(Reg);
7777 assert(!Ty.isVector() && "Expected scalars only!");
7778 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7779 return Reg;
7780
7781 // Create a copy and immediately select it.
7782 // FIXME: We should have an emitCopy function?
7783 auto Copy = MIB.buildCopy({&RC}, {Reg});
7784 selectCopy(*Copy, TII, MRI, TRI, RBI);
7785 return Copy.getReg(0);
7786}
7787
7788/// Select an "extended register" operand. This operand folds in an extend
7789/// followed by an optional left shift.
7791AArch64InstructionSelector::selectArithExtendedRegister(
7792 MachineOperand &Root) const {
7793 if (!Root.isReg())
7794 return std::nullopt;
7796 Root.getParent()->getParent()->getParent()->getRegInfo();
7797
7798 uint64_t ShiftVal = 0;
7799 Register ExtReg;
7801 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7802 if (!RootDef)
7803 return std::nullopt;
7804
7805 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7806 return std::nullopt;
7807
7808 // Check if we can fold a shift and an extend.
7809 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7810 // Look for a constant on the RHS of the shift.
7811 MachineOperand &RHS = RootDef->getOperand(2);
7812 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7813 if (!MaybeShiftVal)
7814 return std::nullopt;
7815 ShiftVal = *MaybeShiftVal;
7816 if (ShiftVal > 4)
7817 return std::nullopt;
7818 // Look for a valid extend instruction on the LHS of the shift.
7819 MachineOperand &LHS = RootDef->getOperand(1);
7820 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7821 if (!ExtDef)
7822 return std::nullopt;
7823 Ext = getExtendTypeForInst(*ExtDef, MRI);
7825 return std::nullopt;
7826 ExtReg = ExtDef->getOperand(1).getReg();
7827 } else {
7828 // Didn't get a shift. Try just folding an extend.
7829 Ext = getExtendTypeForInst(*RootDef, MRI);
7831 return std::nullopt;
7832 ExtReg = RootDef->getOperand(1).getReg();
7833
7834 // If we have a 32 bit instruction which zeroes out the high half of a
7835 // register, we get an implicit zero extend for free. Check if we have one.
7836 // FIXME: We actually emit the extend right now even though we don't have
7837 // to.
7838 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7839 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7840 if (isDef32(*ExtInst))
7841 return std::nullopt;
7842 }
7843 }
7844
7845 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7846 // copy.
7847 MachineIRBuilder MIB(*RootDef);
7848 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7849
7850 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7851 [=](MachineInstrBuilder &MIB) {
7852 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7853 }}};
7854}
7855
7857AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7858 if (!Root.isReg())
7859 return std::nullopt;
7861 Root.getParent()->getParent()->getParent()->getRegInfo();
7862
7863 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7864 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7865 STI.isLittleEndian())
7866 Extract =
7867 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7868 if (!Extract)
7869 return std::nullopt;
7870
7871 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7872 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7873 Register ExtReg = Extract->MI->getOperand(2).getReg();
7874 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7875 }
7876 }
7877 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7878 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7880 Extract->MI->getOperand(2).getReg(), MRI);
7881 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7882 LaneIdx->Value.getSExtValue() == 1) {
7883 Register ExtReg = Extract->MI->getOperand(1).getReg();
7884 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7885 }
7886 }
7887
7888 return std::nullopt;
7889}
7890
7891void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7892 const MachineInstr &MI,
7893 int OpIdx) const {
7894 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7895 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7896 "Expected G_CONSTANT");
7897 std::optional<int64_t> CstVal =
7898 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7899 assert(CstVal && "Expected constant value");
7900 MIB.addImm(*CstVal);
7901}
7902
7903void AArch64InstructionSelector::renderLogicalImm32(
7904 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7905 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7906 "Expected G_CONSTANT");
7907 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7909 MIB.addImm(Enc);
7910}
7911
7912void AArch64InstructionSelector::renderLogicalImm64(
7913 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7914 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7915 "Expected G_CONSTANT");
7916 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7918 MIB.addImm(Enc);
7919}
7920
7921void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7922 const MachineInstr &MI,
7923 int OpIdx) const {
7924 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7925 "Expected G_UBSANTRAP");
7926 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7927}
7928
7929void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7930 const MachineInstr &MI,
7931 int OpIdx) const {
7932 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7933 "Expected G_FCONSTANT");
7934 MIB.addImm(
7935 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7936}
7937
7938void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7939 const MachineInstr &MI,
7940 int OpIdx) const {
7941 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7942 "Expected G_FCONSTANT");
7943 MIB.addImm(
7944 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7945}
7946
7947void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7948 const MachineInstr &MI,
7949 int OpIdx) const {
7950 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7951 "Expected G_FCONSTANT");
7952 MIB.addImm(
7953 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7954}
7955
7956void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7957 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7958 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7959 "Expected G_FCONSTANT");
7961 .getFPImm()
7962 ->getValueAPF()
7963 .bitcastToAPInt()
7964 .getZExtValue()));
7965}
7966
7967bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7968 const MachineInstr &MI, unsigned NumBytes) const {
7969 if (!MI.mayLoadOrStore())
7970 return false;
7971 assert(MI.hasOneMemOperand() &&
7972 "Expected load/store to have only one mem op!");
7973 return (*MI.memoperands_begin())->getSize() == NumBytes;
7974}
7975
7976bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7977 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7978 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7979 return false;
7980
7981 // Only return true if we know the operation will zero-out the high half of
7982 // the 64-bit register. Truncates can be subregister copies, which don't
7983 // zero out the high bits. Copies and other copy-like instructions can be
7984 // fed by truncates, or could be lowered as subregister copies.
7985 switch (MI.getOpcode()) {
7986 default:
7987 return true;
7988 case TargetOpcode::COPY:
7989 case TargetOpcode::G_BITCAST:
7990 case TargetOpcode::G_TRUNC:
7991 case TargetOpcode::G_PHI:
7992 return false;
7993 }
7994}
7995
7996
7997// Perform fixups on the given PHI instruction's operands to force them all
7998// to be the same as the destination regbank.
8000 const AArch64RegisterBankInfo &RBI) {
8001 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8002 Register DstReg = MI.getOperand(0).getReg();
8003 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
8004 assert(DstRB && "Expected PHI dst to have regbank assigned");
8005 MachineIRBuilder MIB(MI);
8006
8007 // Go through each operand and ensure it has the same regbank.
8008 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
8009 if (!MO.isReg())
8010 continue;
8011 Register OpReg = MO.getReg();
8012 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
8013 if (RB != DstRB) {
8014 // Insert a cross-bank copy.
8015 auto *OpDef = MRI.getVRegDef(OpReg);
8016 const LLT &Ty = MRI.getType(OpReg);
8017 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8018
8019 // Any instruction we insert must appear after all PHIs in the block
8020 // for the block to be valid MIR.
8021 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
8022 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8023 InsertPt = OpDefBB.getFirstNonPHI();
8024 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
8025 auto Copy = MIB.buildCopy(Ty, OpReg);
8026 MRI.setRegBank(Copy.getReg(0), *DstRB);
8027 MO.setReg(Copy.getReg(0));
8028 }
8029 }
8030}
8031
8032void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8033 // We're looking for PHIs, build a list so we don't invalidate iterators.
8036 for (auto &BB : MF) {
8037 for (auto &MI : BB) {
8038 if (MI.getOpcode() == TargetOpcode::G_PHI)
8039 Phis.emplace_back(&MI);
8040 }
8041 }
8042
8043 for (auto *MI : Phis) {
8044 // We need to do some work here if the operand types are < 16 bit and they
8045 // are split across fpr/gpr banks. Since all types <32b on gpr
8046 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8047 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8048 // be selecting heterogenous regbanks for operands if possible, but we
8049 // still need to be able to deal with it here.
8050 //
8051 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8052 // one other operand is on the fpr bank, then we add cross-bank copies
8053 // to homogenize the operand banks. For simplicity the bank that we choose
8054 // to settle on is whatever bank the def operand has. For example:
8055 //
8056 // %endbb:
8057 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8058 // =>
8059 // %bb2:
8060 // ...
8061 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8062 // ...
8063 // %endbb:
8064 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8065 bool HasGPROp = false, HasFPROp = false;
8066 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8067 if (!MO.isReg())
8068 continue;
8069 const LLT &Ty = MRI.getType(MO.getReg());
8070 if (!Ty.isValid() || !Ty.isScalar())
8071 break;
8072 if (Ty.getSizeInBits() >= 32)
8073 break;
8074 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8075 // If for some reason we don't have a regbank yet. Don't try anything.
8076 if (!RB)
8077 break;
8078
8079 if (RB->getID() == AArch64::GPRRegBankID)
8080 HasGPROp = true;
8081 else
8082 HasFPROp = true;
8083 }
8084 // We have heterogenous regbanks, need to fixup.
8085 if (HasGPROp && HasFPROp)
8086 fixupPHIOpBanks(*MI, MRI, RBI);
8087 }
8088}
8089
8090namespace llvm {
8093 const AArch64Subtarget &Subtarget,
8094 const AArch64RegisterBankInfo &RBI) {
8095 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8096}
8097}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
bool isIntPredicate() const
Definition: InstrTypes.h:781
bool isUnsigned() const
Definition: InstrTypes.h:934
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3067
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const APFloat & getValueAPF() const
Definition: Constants.h:314
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:321
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:318
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:163
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:151
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1421
This is an important base class in LLVM.
Definition: Constant.h:42
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1708
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1771
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:421
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:211
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:251
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:200
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
Key
PAL metadata keys.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FrameIndex
Definition: ISDOpcodes.h:80
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:895
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:630
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:444
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:279
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:471
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:299
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:424
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:418
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:452
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:478
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.