LLVM 20.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251 MachineOperand &Predicate,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
354 AArch64CC::CondCode Predicate,
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
359 AArch64CC::CondCode Predicate,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
508 MachineOperand &Predicate,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
519 const AArch64TargetMachine &TM;
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
557#include "AArch64GenGlobalISel.inc"
560#include "AArch64GenGlobalISel.inc"
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
626 }
627
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
630 default:
631 return nullptr;
632 case 8:
633 return &AArch64::FPR8RegClass;
634 case 16:
635 return &AArch64::FPR16RegClass;
636 case 32:
637 return &AArch64::FPR32RegClass;
638 case 64:
639 return &AArch64::FPR64RegClass;
640 case 128:
641 return &AArch64::FPR128RegClass;
642 }
643 }
644
645 return nullptr;
646}
647
648/// Returns the correct subregister to use for a given register class.
650 const TargetRegisterInfo &TRI, unsigned &SubReg) {
651 switch (TRI.getRegSizeInBits(*RC)) {
652 case 8:
653 SubReg = AArch64::bsub;
654 break;
655 case 16:
656 SubReg = AArch64::hsub;
657 break;
658 case 32:
659 if (RC != &AArch64::FPR32RegClass)
660 SubReg = AArch64::sub_32;
661 else
662 SubReg = AArch64::ssub;
663 break;
664 case 64:
665 SubReg = AArch64::dsub;
666 break;
667 default:
669 dbgs() << "Couldn't find appropriate subregister for register class.");
670 return false;
671 }
672
673 return true;
674}
675
676/// Returns the minimum size the given register bank can hold.
677static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
678 switch (RB.getID()) {
679 case AArch64::GPRRegBankID:
680 return 32;
681 case AArch64::FPRRegBankID:
682 return 8;
683 default:
684 llvm_unreachable("Tried to get minimum size for unknown register bank.");
685 }
686}
687
688/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
689/// Helper function for functions like createDTuple and createQTuple.
690///
691/// \p RegClassIDs - The list of register class IDs available for some tuple of
692/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
693/// expected to contain between 2 and 4 tuple classes.
694///
695/// \p SubRegs - The list of subregister classes associated with each register
696/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
697/// subregister class. The index of each subregister class is expected to
698/// correspond with the index of each register class.
699///
700/// \returns Either the destination register of REG_SEQUENCE instruction that
701/// was created, or the 0th element of \p Regs if \p Regs contains a single
702/// element.
704 const unsigned RegClassIDs[],
705 const unsigned SubRegs[], MachineIRBuilder &MIB) {
706 unsigned NumRegs = Regs.size();
707 if (NumRegs == 1)
708 return Regs[0];
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
713 auto RegSequence =
714 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
716 RegSequence.addUse(Regs[I]);
717 RegSequence.addImm(SubRegs[I]);
718 }
719 return RegSequence.getReg(0);
720}
721
722/// Create a tuple of D-registers using the registers in \p Regs.
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
729}
730
731/// Create a tuple of Q-registers using the registers in \p Regs.
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
738}
739
740static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
741 auto &MI = *Root.getParent();
742 auto &MBB = *MI.getParent();
743 auto &MF = *MBB.getParent();
744 auto &MRI = MF.getRegInfo();
745 uint64_t Immed;
746 if (Root.isImm())
747 Immed = Root.getImm();
748 else if (Root.isCImm())
749 Immed = Root.getCImm()->getZExtValue();
750 else if (Root.isReg()) {
751 auto ValAndVReg =
753 if (!ValAndVReg)
754 return std::nullopt;
755 Immed = ValAndVReg->Value.getSExtValue();
756 } else
757 return std::nullopt;
758 return Immed;
759}
760
761/// Check whether \p I is a currently unsupported binary operation:
762/// - it has an unsized type
763/// - an operand is not a vreg
764/// - all operands are not in the same bank
765/// These are checks that should someday live in the verifier, but right now,
766/// these are mostly limitations of the aarch64 selector.
767static bool unsupportedBinOp(const MachineInstr &I,
768 const AArch64RegisterBankInfo &RBI,
770 const AArch64RegisterInfo &TRI) {
771 LLT Ty = MRI.getType(I.getOperand(0).getReg());
772 if (!Ty.isValid()) {
773 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
774 return true;
775 }
776
777 const RegisterBank *PrevOpBank = nullptr;
778 for (auto &MO : I.operands()) {
779 // FIXME: Support non-register operands.
780 if (!MO.isReg()) {
781 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
782 return true;
783 }
784
785 // FIXME: Can generic operations have physical registers operands? If
786 // so, this will need to be taught about that, and we'll need to get the
787 // bank out of the minimal class for the register.
788 // Either way, this needs to be documented (and possibly verified).
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
791 return true;
792 }
793
794 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
795 if (!OpBank) {
796 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
797 return true;
798 }
799
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
802 return true;
803 }
804 PrevOpBank = OpBank;
805 }
806 return false;
807}
808
809/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
810/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
811/// and of size \p OpSize.
812/// \returns \p GenericOpc if the combination is unsupported.
813static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
814 unsigned OpSize) {
815 switch (RegBankID) {
816 case AArch64::GPRRegBankID:
817 if (OpSize == 32) {
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
825 default:
826 return GenericOpc;
827 }
828 } else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
838 default:
839 return GenericOpc;
840 }
841 }
842 break;
843 case AArch64::FPRRegBankID:
844 switch (OpSize) {
845 case 32:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
855 default:
856 return GenericOpc;
857 }
858 case 64:
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
870 default:
871 return GenericOpc;
872 }
873 }
874 break;
875 }
876 return GenericOpc;
877}
878
879/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
880/// appropriate for the (value) register bank \p RegBankID and of memory access
881/// size \p OpSize. This returns the variant with the base+unsigned-immediate
882/// addressing mode (e.g., LDRXui).
883/// \returns \p GenericOpc if the combination is unsupported.
884static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
885 unsigned OpSize) {
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
887 switch (RegBankID) {
888 case AArch64::GPRRegBankID:
889 switch (OpSize) {
890 case 8:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
892 case 16:
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
894 case 32:
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
896 case 64:
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
898 }
899 break;
900 case AArch64::FPRRegBankID:
901 switch (OpSize) {
902 case 8:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
904 case 16:
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
906 case 32:
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
908 case 64:
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
910 case 128:
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
912 }
913 break;
914 }
915 return GenericOpc;
916}
917
918/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
919/// to \p *To.
920///
921/// E.g "To = COPY SrcReg:SubReg"
923 const RegisterBankInfo &RBI, Register SrcReg,
924 const TargetRegisterClass *To, unsigned SubReg) {
925 assert(SrcReg.isValid() && "Expected a valid source register?");
926 assert(To && "Destination register class cannot be null");
927 assert(SubReg && "Expected a valid subregister");
928
929 MachineIRBuilder MIB(I);
930 auto SubRegCopy =
931 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
932 MachineOperand &RegOp = I.getOperand(1);
933 RegOp.setReg(SubRegCopy.getReg(0));
934
935 // It's possible that the destination register won't be constrained. Make
936 // sure that happens.
937 if (!I.getOperand(0).getReg().isPhysical())
938 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
939
940 return true;
941}
942
943/// Helper function to get the source and destination register classes for a
944/// copy. Returns a std::pair containing the source register class for the
945/// copy, and the destination register class for the copy. If a register class
946/// cannot be determined, then it will be nullptr.
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
950 const RegisterBankInfo &RBI) {
951 Register DstReg = I.getOperand(0).getReg();
952 Register SrcReg = I.getOperand(1).getReg();
953 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
954 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
955
956 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
957 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
958
959 // Special casing for cross-bank copies of s1s. We can technically represent
960 // a 1-bit value with any size of register. The minimum size for a GPR is 32
961 // bits. So, we need to put the FPR on 32 bits as well.
962 //
963 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
964 // then we can pull it into the helpers that get the appropriate class for a
965 // register bank. Or make a new helper that carries along some constraint
966 // information.
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
968 SrcSize = DstSize = TypeSize::getFixed(32);
969
970 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
971 getMinClassForRegBank(DstRegBank, DstSize, true)};
972}
973
974// FIXME: We need some sort of API in RBI/TRI to allow generic code to
975// constrain operands of simple instructions given a TargetRegisterClass
976// and LLT
978 const RegisterBankInfo &RBI) {
979 for (MachineOperand &MO : I.operands()) {
980 if (!MO.isReg())
981 continue;
982 Register Reg = MO.getReg();
983 if (!Reg)
984 continue;
985 if (Reg.isPhysical())
986 continue;
987 LLT Ty = MRI.getType(Reg);
988 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
989 const TargetRegisterClass *RC =
990 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
991 if (!RC) {
992 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
993 RC = getRegClassForTypeOnBank(Ty, RB);
994 if (!RC) {
996 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
997 break;
998 }
999 }
1000 RBI.constrainGenericRegister(Reg, *RC, MRI);
1001 }
1002
1003 return true;
1004}
1005
1008 const RegisterBankInfo &RBI) {
1009 Register DstReg = I.getOperand(0).getReg();
1010 Register SrcReg = I.getOperand(1).getReg();
1011 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1012 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1013
1014 // Find the correct register classes for the source and destination registers.
1015 const TargetRegisterClass *SrcRC;
1016 const TargetRegisterClass *DstRC;
1017 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1018
1019 if (!DstRC) {
1020 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1021 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1022 return false;
1023 }
1024
1025 // Is this a copy? If so, then we may need to insert a subregister copy.
1026 if (I.isCopy()) {
1027 // Yes. Check if there's anything to fix up.
1028 if (!SrcRC) {
1029 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1030 return false;
1031 }
1032
1033 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1035 unsigned SubReg;
1036
1037 // If the source bank doesn't support a subregister copy small enough,
1038 // then we first need to copy to the destination bank.
1039 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1040 const TargetRegisterClass *DstTempRC =
1041 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1042 getSubRegForClass(DstRC, TRI, SubReg);
1043
1044 MachineIRBuilder MIB(I);
1045 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1046 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1047 } else if (SrcSize > DstSize) {
1048 // If the source register is bigger than the destination we need to
1049 // perform a subregister copy.
1050 const TargetRegisterClass *SubRegRC =
1051 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1052 getSubRegForClass(SubRegRC, TRI, SubReg);
1053 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1054 } else if (DstSize > SrcSize) {
1055 // If the destination register is bigger than the source we need to do
1056 // a promotion using SUBREG_TO_REG.
1057 const TargetRegisterClass *PromotionRC =
1058 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1059 getSubRegForClass(SrcRC, TRI, SubReg);
1060
1061 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1062 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1064 .addImm(0)
1065 .addUse(SrcReg)
1066 .addImm(SubReg);
1067 MachineOperand &RegOp = I.getOperand(1);
1068 RegOp.setReg(PromoteReg);
1069 }
1070
1071 // If the destination is a physical register, then there's nothing to
1072 // change, so we're done.
1073 if (DstReg.isPhysical())
1074 return true;
1075 }
1076
1077 // No need to constrain SrcReg. It will get constrained when we hit another
1078 // of its use or its defs. Copies do not have constraints.
1079 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1080 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1081 << " operand\n");
1082 return false;
1083 }
1084
1085 // If this a GPR ZEXT that we want to just reduce down into a copy.
1086 // The sizes will be mismatched with the source < 32b but that's ok.
1087 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(TII.get(AArch64::COPY));
1089 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1090 return selectCopy(I, TII, MRI, TRI, RBI);
1091 }
1092
1093 I.setDesc(TII.get(AArch64::COPY));
1094 return true;
1095}
1096
1097static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1098 if (!DstTy.isScalar() || !SrcTy.isScalar())
1099 return GenericOpc;
1100
1101 const unsigned DstSize = DstTy.getSizeInBits();
1102 const unsigned SrcSize = SrcTy.getSizeInBits();
1103
1104 switch (DstSize) {
1105 case 32:
1106 switch (SrcSize) {
1107 case 32:
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1117 default:
1118 return GenericOpc;
1119 }
1120 case 64:
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1130 default:
1131 return GenericOpc;
1132 }
1133 default:
1134 return GenericOpc;
1135 }
1136 case 64:
1137 switch (SrcSize) {
1138 case 32:
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1148 default:
1149 return GenericOpc;
1150 }
1151 case 64:
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1161 default:
1162 return GenericOpc;
1163 }
1164 default:
1165 return GenericOpc;
1166 }
1167 default:
1168 return GenericOpc;
1169 };
1170 return GenericOpc;
1171}
1172
1174AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1176 MachineIRBuilder &MIB) const {
1177 MachineRegisterInfo &MRI = *MIB.getMRI();
1178 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1179 RBI.getRegBank(True, MRI, TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty = MRI.getType(True);
1182 if (Ty.isVector())
1183 return nullptr;
1184 const unsigned Size = Ty.getSizeInBits();
1185 assert((Size == 32 || Size == 64) &&
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit = Size == 32;
1188 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1192 return &*FCSel;
1193 }
1194
1195 // By default, we'll try and emit a CSEL.
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1197 bool Optimized = false;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1199 &Optimized](Register &Reg, Register &OtherReg,
1200 bool Invert) {
1201 if (Optimized)
1202 return false;
1203
1204 // Attempt to fold:
1205 //
1206 // %sub = G_SUB 0, %x
1207 // %select = G_SELECT cc, %reg, %sub
1208 //
1209 // Into:
1210 // %select = CSNEG %reg, %x, cc
1211 Register MatchReg;
1212 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1214 Reg = MatchReg;
1215 if (Invert) {
1217 std::swap(Reg, OtherReg);
1218 }
1219 return true;
1220 }
1221
1222 // Attempt to fold:
1223 //
1224 // %xor = G_XOR %x, -1
1225 // %select = G_SELECT cc, %reg, %xor
1226 //
1227 // Into:
1228 // %select = CSINV %reg, %x, cc
1229 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1231 Reg = MatchReg;
1232 if (Invert) {
1234 std::swap(Reg, OtherReg);
1235 }
1236 return true;
1237 }
1238
1239 // Attempt to fold:
1240 //
1241 // %add = G_ADD %x, 1
1242 // %select = G_SELECT cc, %reg, %add
1243 //
1244 // Into:
1245 // %select = CSINC %reg, %x, cc
1246 if (mi_match(Reg, MRI,
1247 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1248 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1250 Reg = MatchReg;
1251 if (Invert) {
1253 std::swap(Reg, OtherReg);
1254 }
1255 return true;
1256 }
1257
1258 return false;
1259 };
1260
1261 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1262 // true/false values are constants.
1263 // FIXME: All of these patterns already exist in tablegen. We should be
1264 // able to import these.
1265 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1266 &Optimized]() {
1267 if (Optimized)
1268 return false;
1269 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1270 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1271 if (!TrueCst && !FalseCst)
1272 return false;
1273
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t T = TrueCst->Value.getSExtValue();
1277 int64_t F = FalseCst->Value.getSExtValue();
1278
1279 if (T == 0 && F == 1) {
1280 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1282 True = ZReg;
1283 False = ZReg;
1284 return true;
1285 }
1286
1287 if (T == 0 && F == -1) {
1288 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1290 True = ZReg;
1291 False = ZReg;
1292 return true;
1293 }
1294 }
1295
1296 if (TrueCst) {
1297 int64_t T = TrueCst->Value.getSExtValue();
1298 if (T == 1) {
1299 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1301 True = False;
1302 False = ZReg;
1304 return true;
1305 }
1306
1307 if (T == -1) {
1308 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1310 True = False;
1311 False = ZReg;
1313 return true;
1314 }
1315 }
1316
1317 if (FalseCst) {
1318 int64_t F = FalseCst->Value.getSExtValue();
1319 if (F == 1) {
1320 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1322 False = ZReg;
1323 return true;
1324 }
1325
1326 if (F == -1) {
1327 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1329 False = ZReg;
1330 return true;
1331 }
1332 }
1333 return false;
1334 };
1335
1336 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1338 Optimized |= TryOptSelectCst();
1339 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1341 return &*SelectInst;
1342}
1343
1345 switch (P) {
1346 default:
1347 llvm_unreachable("Unknown condition code!");
1348 case CmpInst::ICMP_NE:
1349 return AArch64CC::NE;
1350 case CmpInst::ICMP_EQ:
1351 return AArch64CC::EQ;
1352 case CmpInst::ICMP_SGT:
1353 return AArch64CC::GT;
1354 case CmpInst::ICMP_SGE:
1355 return AArch64CC::GE;
1356 case CmpInst::ICMP_SLT:
1357 return AArch64CC::LT;
1358 case CmpInst::ICMP_SLE:
1359 return AArch64CC::LE;
1360 case CmpInst::ICMP_UGT:
1361 return AArch64CC::HI;
1362 case CmpInst::ICMP_UGE:
1363 return AArch64CC::HS;
1364 case CmpInst::ICMP_ULT:
1365 return AArch64CC::LO;
1366 case CmpInst::ICMP_ULE:
1367 return AArch64CC::LS;
1368 }
1369}
1370
1371/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1373 AArch64CC::CondCode &CondCode,
1374 AArch64CC::CondCode &CondCode2) {
1375 CondCode2 = AArch64CC::AL;
1376 switch (CC) {
1377 default:
1378 llvm_unreachable("Unknown FP condition!");
1379 case CmpInst::FCMP_OEQ:
1380 CondCode = AArch64CC::EQ;
1381 break;
1382 case CmpInst::FCMP_OGT:
1383 CondCode = AArch64CC::GT;
1384 break;
1385 case CmpInst::FCMP_OGE:
1386 CondCode = AArch64CC::GE;
1387 break;
1388 case CmpInst::FCMP_OLT:
1389 CondCode = AArch64CC::MI;
1390 break;
1391 case CmpInst::FCMP_OLE:
1392 CondCode = AArch64CC::LS;
1393 break;
1394 case CmpInst::FCMP_ONE:
1395 CondCode = AArch64CC::MI;
1396 CondCode2 = AArch64CC::GT;
1397 break;
1398 case CmpInst::FCMP_ORD:
1399 CondCode = AArch64CC::VC;
1400 break;
1401 case CmpInst::FCMP_UNO:
1402 CondCode = AArch64CC::VS;
1403 break;
1404 case CmpInst::FCMP_UEQ:
1405 CondCode = AArch64CC::EQ;
1406 CondCode2 = AArch64CC::VS;
1407 break;
1408 case CmpInst::FCMP_UGT:
1409 CondCode = AArch64CC::HI;
1410 break;
1411 case CmpInst::FCMP_UGE:
1412 CondCode = AArch64CC::PL;
1413 break;
1414 case CmpInst::FCMP_ULT:
1415 CondCode = AArch64CC::LT;
1416 break;
1417 case CmpInst::FCMP_ULE:
1418 CondCode = AArch64CC::LE;
1419 break;
1420 case CmpInst::FCMP_UNE:
1421 CondCode = AArch64CC::NE;
1422 break;
1423 }
1424}
1425
1426/// Convert an IR fp condition code to an AArch64 CC.
1427/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1428/// should be AND'ed instead of OR'ed.
1430 AArch64CC::CondCode &CondCode,
1431 AArch64CC::CondCode &CondCode2) {
1432 CondCode2 = AArch64CC::AL;
1433 switch (CC) {
1434 default:
1435 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1436 assert(CondCode2 == AArch64CC::AL);
1437 break;
1438 case CmpInst::FCMP_ONE:
1439 // (a one b)
1440 // == ((a olt b) || (a ogt b))
1441 // == ((a ord b) && (a une b))
1442 CondCode = AArch64CC::VC;
1443 CondCode2 = AArch64CC::NE;
1444 break;
1445 case CmpInst::FCMP_UEQ:
1446 // (a ueq b)
1447 // == ((a uno b) || (a oeq b))
1448 // == ((a ule b) && (a uge b))
1449 CondCode = AArch64CC::PL;
1450 CondCode2 = AArch64CC::LE;
1451 break;
1452 }
1453}
1454
1455/// Return a register which can be used as a bit to test in a TB(N)Z.
1456static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1458 assert(Reg.isValid() && "Expected valid register!");
1459 bool HasZext = false;
1460 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1461 unsigned Opc = MI->getOpcode();
1462
1463 if (!MI->getOperand(0).isReg() ||
1464 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1465 break;
1466
1467 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1468 //
1469 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1470 // on the truncated x is the same as the bit number on x.
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1474 HasZext = true;
1475
1476 Register NextReg = MI->getOperand(1).getReg();
1477 // Did we find something worth folding?
1478 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1479 break;
1480
1481 // NextReg is worth folding. Keep looking.
1482 Reg = NextReg;
1483 continue;
1484 }
1485
1486 // Attempt to find a suitable operation with a constant on one side.
1487 std::optional<uint64_t> C;
1488 Register TestReg;
1489 switch (Opc) {
1490 default:
1491 break;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg = MI->getOperand(1).getReg();
1495 Register ConstantReg = MI->getOperand(2).getReg();
1496 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1497 if (!VRegAndVal) {
1498 // AND commutes, check the other side for a constant.
1499 // FIXME: Can we canonicalize the constant so that it's always on the
1500 // same side at some point earlier?
1501 std::swap(ConstantReg, TestReg);
1502 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1503 }
1504 if (VRegAndVal) {
1505 if (HasZext)
1506 C = VRegAndVal->Value.getZExtValue();
1507 else
1508 C = VRegAndVal->Value.getSExtValue();
1509 }
1510 break;
1511 }
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg = MI->getOperand(1).getReg();
1516 auto VRegAndVal =
1517 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1518 if (VRegAndVal)
1519 C = VRegAndVal->Value.getSExtValue();
1520 break;
1521 }
1522 }
1523
1524 // Didn't find a constant or viable register. Bail out of the loop.
1525 if (!C || !TestReg.isValid())
1526 break;
1527
1528 // We found a suitable instruction with a constant. Check to see if we can
1529 // walk through the instruction.
1530 Register NextReg;
1531 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1532 switch (Opc) {
1533 default:
1534 break;
1535 case TargetOpcode::G_AND:
1536 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1537 if ((*C >> Bit) & 1)
1538 NextReg = TestReg;
1539 break;
1540 case TargetOpcode::G_SHL:
1541 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1542 // the type of the register.
1543 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1544 NextReg = TestReg;
1545 Bit = Bit - *C;
1546 }
1547 break;
1548 case TargetOpcode::G_ASHR:
1549 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1550 // in x
1551 NextReg = TestReg;
1552 Bit = Bit + *C;
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1555 break;
1556 case TargetOpcode::G_LSHR:
1557 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1558 if ((Bit + *C) < TestRegSize) {
1559 NextReg = TestReg;
1560 Bit = Bit + *C;
1561 }
1562 break;
1563 case TargetOpcode::G_XOR:
1564 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1565 // appropriate.
1566 //
1567 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1568 //
1569 // tbz x', b -> tbnz x, b
1570 //
1571 // Because x' only has the b-th bit set if x does not.
1572 if ((*C >> Bit) & 1)
1573 Invert = !Invert;
1574 NextReg = TestReg;
1575 break;
1576 }
1577
1578 // Check if we found anything worth folding.
1579 if (!NextReg.isValid())
1580 return Reg;
1581 Reg = NextReg;
1582 }
1583
1584 return Reg;
1585}
1586
1587MachineInstr *AArch64InstructionSelector::emitTestBit(
1588 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1589 MachineIRBuilder &MIB) const {
1590 assert(TestReg.isValid());
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1593 MachineRegisterInfo &MRI = *MIB.getMRI();
1594
1595 // Attempt to optimize the test bit by walking over instructions.
1596 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1597 LLT Ty = MRI.getType(TestReg);
1598 unsigned Size = Ty.getSizeInBits();
1599 assert(!Ty.isVector() && "Expected a scalar!");
1600 assert(Bit < 64 && "Bit is too large!");
1601
1602 // When the test register is a 64-bit register, we have to narrow to make
1603 // TBNZW work.
1604 bool UseWReg = Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1609 MIB);
1610
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1614 auto TestBitMI =
1615 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1616 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1617 return &*TestBitMI;
1618}
1619
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1621 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1622 MachineIRBuilder &MIB) const {
1623 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1624 // Given something like this:
1625 //
1626 // %x = ...Something...
1627 // %one = G_CONSTANT i64 1
1628 // %zero = G_CONSTANT i64 0
1629 // %and = G_AND %x, %one
1630 // %cmp = G_ICMP intpred(ne), %and, %zero
1631 // %cmp_trunc = G_TRUNC %cmp
1632 // G_BRCOND %cmp_trunc, %bb.3
1633 //
1634 // We want to try and fold the AND into the G_BRCOND and produce either a
1635 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1636 //
1637 // In this case, we'd get
1638 //
1639 // TBNZ %x %bb.3
1640 //
1641
1642 // Check if the AND has a constant on its RHS which we can use as a mask.
1643 // If it's a power of 2, then it's the same as checking a specific bit.
1644 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1645 auto MaybeBit = getIConstantVRegValWithLookThrough(
1646 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1647 if (!MaybeBit)
1648 return false;
1649
1650 int32_t Bit = MaybeBit->Value.exactLogBase2();
1651 if (Bit < 0)
1652 return false;
1653
1654 Register TestReg = AndInst.getOperand(1).getReg();
1655
1656 // Emit a TB(N)Z.
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1658 return true;
1659}
1660
1661MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1662 bool IsNegative,
1663 MachineBasicBlock *DestMBB,
1664 MachineIRBuilder &MIB) const {
1665 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1666 MachineRegisterInfo &MRI = *MIB.getMRI();
1667 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty = MRI.getType(CompareReg);
1671 unsigned Width = Ty.getSizeInBits();
1672 assert(!Ty.isVector() && "Expected scalar only?");
1673 assert(Width <= 64 && "Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1678 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1679 return &*BranchMI;
1680}
1681
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1683 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1684 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1685 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1686 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1687 // totally clean. Some of them require two branches to implement.
1688 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1689 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1690 Pred);
1691 AArch64CC::CondCode CC1, CC2;
1692 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1693 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1694 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1695 if (CC2 != AArch64CC::AL)
1696 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1697 I.eraseFromParent();
1698 return true;
1699}
1700
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1702 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1703 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1704 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1705 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1706 //
1707 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1708 // instructions will not be produced, as they are conditional branch
1709 // instructions that do not set flags.
1710 if (!ProduceNonFlagSettingCondBr)
1711 return false;
1712
1713 MachineRegisterInfo &MRI = *MIB.getMRI();
1714 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1715 auto Pred =
1716 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1717 Register LHS = ICmp.getOperand(2).getReg();
1718 Register RHS = ICmp.getOperand(3).getReg();
1719
1720 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1721 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1722 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1723
1724 // When we can emit a TB(N)Z, prefer that.
1725 //
1726 // Handle non-commutative condition codes first.
1727 // Note that we don't want to do this when we have a G_AND because it can
1728 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1729 if (VRegAndVal && !AndInst) {
1730 int64_t C = VRegAndVal->Value.getSExtValue();
1731
1732 // When we have a greater-than comparison, we can just test if the msb is
1733 // zero.
1734 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1735 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1736 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1737 I.eraseFromParent();
1738 return true;
1739 }
1740
1741 // When we have a less than comparison, we can just test if the msb is not
1742 // zero.
1743 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1744 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1745 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1746 I.eraseFromParent();
1747 return true;
1748 }
1749
1750 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1751 // we can test if the msb is zero.
1752 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1753 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1754 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1755 I.eraseFromParent();
1756 return true;
1757 }
1758 }
1759
1760 // Attempt to handle commutative condition codes. Right now, that's only
1761 // eq/ne.
1762 if (ICmpInst::isEquality(Pred)) {
1763 if (!VRegAndVal) {
1764 std::swap(RHS, LHS);
1765 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1766 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1767 }
1768
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1770 // If there's a G_AND feeding into this branch, try to fold it away by
1771 // emitting a TB(N)Z instead.
1772 //
1773 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1774 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1775 // would be redundant.
1776 if (AndInst &&
1777 tryOptAndIntoCompareBranch(
1778 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1779 I.eraseFromParent();
1780 return true;
1781 }
1782
1783 // Otherwise, try to emit a CB(N)Z instead.
1784 auto LHSTy = MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1786 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1787 I.eraseFromParent();
1788 return true;
1789 }
1790 }
1791 }
1792
1793 return false;
1794}
1795
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1797 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1798 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1799 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1801 return true;
1802
1803 // Couldn't optimize. Emit a compare + a Bcc.
1804 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1805 auto PredOp = ICmp.getOperand(1);
1806 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1808 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1809 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1810 I.eraseFromParent();
1811 return true;
1812}
1813
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg = I.getOperand(0).getReg();
1817 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1818 // Try to select the G_BRCOND using whatever is feeding the condition if
1819 // possible.
1820 unsigned CCMIOpc = CCMI->getOpcode();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1825
1826 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1827 // instructions will not be produced, as they are conditional branch
1828 // instructions that do not set flags.
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1833 return true;
1834 }
1835
1836 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1837 auto TstMI =
1838 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1840 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1842 .addMBB(I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1844 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1845}
1846
1847/// Returns the element immediate value of a vector shift operand if found.
1848/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1849static std::optional<int64_t> getVectorShiftImm(Register Reg,
1851 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1852 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1853 return getAArch64VectorSplatScalar(*OpMI, MRI);
1854}
1855
1856/// Matches and returns the shift immediate value for a SHL instruction given
1857/// a shift operand.
1858static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1860 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1861 if (!ShiftImm)
1862 return std::nullopt;
1863 // Check the immediate is in range for a SHL.
1864 int64_t Imm = *ShiftImm;
1865 if (Imm < 0)
1866 return std::nullopt;
1867 switch (SrcTy.getElementType().getSizeInBits()) {
1868 default:
1869 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1870 return std::nullopt;
1871 case 8:
1872 if (Imm > 7)
1873 return std::nullopt;
1874 break;
1875 case 16:
1876 if (Imm > 15)
1877 return std::nullopt;
1878 break;
1879 case 32:
1880 if (Imm > 31)
1881 return std::nullopt;
1882 break;
1883 case 64:
1884 if (Imm > 63)
1885 return std::nullopt;
1886 break;
1887 }
1888 return Imm;
1889}
1890
1891bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1893 assert(I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg = I.getOperand(0).getReg();
1895 const LLT Ty = MRI.getType(DstReg);
1896 Register Src1Reg = I.getOperand(1).getReg();
1897 Register Src2Reg = I.getOperand(2).getReg();
1898
1899 if (!Ty.isVector())
1900 return false;
1901
1902 // Check if we have a vector of constants on RHS that we can select as the
1903 // immediate form.
1904 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1905
1906 unsigned Opc = 0;
1907 if (Ty == LLT::fixed_vector(2, 64)) {
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1909 } else if (Ty == LLT::fixed_vector(4, 32)) {
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1911 } else if (Ty == LLT::fixed_vector(2, 32)) {
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1913 } else if (Ty == LLT::fixed_vector(4, 16)) {
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1915 } else if (Ty == LLT::fixed_vector(8, 16)) {
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1917 } else if (Ty == LLT::fixed_vector(16, 8)) {
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1919 } else if (Ty == LLT::fixed_vector(8, 8)) {
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1921 } else {
1922 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1923 return false;
1924 }
1925
1926 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1927 if (ImmVal)
1928 Shl.addImm(*ImmVal);
1929 else
1930 Shl.addUse(Src2Reg);
1932 I.eraseFromParent();
1933 return true;
1934}
1935
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg = I.getOperand(0).getReg();
1941 const LLT Ty = MRI.getType(DstReg);
1942 Register Src1Reg = I.getOperand(1).getReg();
1943 Register Src2Reg = I.getOperand(2).getReg();
1944
1945 if (!Ty.isVector())
1946 return false;
1947
1948 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1949
1950 // We expect the immediate case to be lowered in the PostLegalCombiner to
1951 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1952
1953 // There is not a shift right register instruction, but the shift left
1954 // register instruction takes a signed value, where negative numbers specify a
1955 // right shift.
1956
1957 unsigned Opc = 0;
1958 unsigned NegOpc = 0;
1959 const TargetRegisterClass *RC =
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1961 if (Ty == LLT::fixed_vector(2, 64)) {
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1964 } else if (Ty == LLT::fixed_vector(4, 32)) {
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1967 } else if (Ty == LLT::fixed_vector(2, 32)) {
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1970 } else if (Ty == LLT::fixed_vector(4, 16)) {
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1973 } else if (Ty == LLT::fixed_vector(8, 16)) {
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1976 } else if (Ty == LLT::fixed_vector(16, 8)) {
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1979 } else if (Ty == LLT::fixed_vector(8, 8)) {
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1982 } else {
1983 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1984 return false;
1985 }
1986
1987 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1992 return true;
1993}
1994
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
1997 return false;
1998}
1999
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg = I.getOperand(0).getReg();
2004
2005 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2006
2007 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2010 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2011 ? FuncInfo->getVarArgsGPRIndex()
2012 : FuncInfo->getVarArgsStackIndex();
2013 }
2014
2015 auto MIB =
2016 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2017 .addDef(ArgsAddrReg)
2018 .addFrameIndex(FrameIdx)
2019 .addImm(0)
2020 .addImm(0);
2021
2023
2024 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2025 .addUse(ArgsAddrReg)
2026 .addUse(ListReg)
2027 .addImm(0)
2028 .addMemOperand(*I.memoperands_begin());
2029
2031 I.eraseFromParent();
2032 return true;
2033}
2034
2035void AArch64InstructionSelector::materializeLargeCMVal(
2036 MachineInstr &I, const Value *V, unsigned OpFlags) {
2037 MachineBasicBlock &MBB = *I.getParent();
2038 MachineFunction &MF = *MBB.getParent();
2040
2041 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2042 MovZ->addOperand(MF, I.getOperand(1));
2043 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2045 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2047
2048 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2049 Register ForceDstReg) {
2050 Register DstReg = ForceDstReg
2051 ? ForceDstReg
2052 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2053 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2054 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2057 } else {
2058 MovI->addOperand(
2059 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2060 MovZ->getOperand(1).getOffset(), Flags));
2061 }
2062 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2064 return DstReg;
2065 };
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2068 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2069 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2070}
2071
2072bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2073 MachineBasicBlock &MBB = *I.getParent();
2074 MachineFunction &MF = *MBB.getParent();
2076
2077 switch (I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2080 MachineOperand &SrcOp = I.getOperand(0);
2081 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2082 // Allow matching with imported patterns for stores of pointers. Unlike
2083 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2084 // and constrain.
2085 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2086 Register NewSrc = Copy.getReg(0);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2089 Changed = true;
2090 }
2091 return Changed;
2092 }
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(I, MRI);
2095 case TargetOpcode::G_LOAD: {
2096 // For scalar loads of pointers, we try to convert the dest type from p0
2097 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2098 // conversion, this should be ok because all users should have been
2099 // selected already, so the type doesn't matter for them.
2100 Register DstReg = I.getOperand(0).getReg();
2101 const LLT DstTy = MRI.getType(DstReg);
2102 if (!DstTy.isPointer())
2103 return false;
2104 MRI.setType(DstReg, LLT::scalar(64));
2105 return true;
2106 }
2107 case AArch64::G_DUP: {
2108 // Convert the type from p0 to s64 to help selection.
2109 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2110 if (!DstTy.isPointerVector())
2111 return false;
2112 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2113 MRI.setType(I.getOperand(0).getReg(),
2114 DstTy.changeElementType(LLT::scalar(64)));
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2117 return true;
2118 }
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2121 // If both source and destination regbanks are FPR, then convert the opcode
2122 // to G_SITOF so that the importer can select it to an fpr variant.
2123 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2124 // copy.
2125 Register SrcReg = I.getOperand(1).getReg();
2126 LLT SrcTy = MRI.getType(SrcReg);
2127 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2128 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2129 return false;
2130
2131 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(TII.get(AArch64::G_SITOF));
2134 else
2135 I.setDesc(TII.get(AArch64::G_UITOF));
2136 return true;
2137 }
2138 return false;
2139 }
2140 default:
2141 return false;
2142 }
2143}
2144
2145/// This lowering tries to look for G_PTR_ADD instructions and then converts
2146/// them to a standard G_ADD with a COPY on the source.
2147///
2148/// The motivation behind this is to expose the add semantics to the imported
2149/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2150/// because the selector works bottom up, uses before defs. By the time we
2151/// end up trying to select a G_PTR_ADD, we should have already attempted to
2152/// fold this into addressing modes and were therefore unsuccessful.
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2156 Register DstReg = I.getOperand(0).getReg();
2157 Register AddOp1Reg = I.getOperand(1).getReg();
2158 const LLT PtrTy = MRI.getType(DstReg);
2159 if (PtrTy.getAddressSpace() != 0)
2160 return false;
2161
2162 const LLT CastPtrTy =
2163 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2164 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2165 // Set regbanks on the registers.
2166 if (PtrTy.isVector())
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2168 else
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2170
2171 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2172 // %dst(intty) = G_ADD %intbase, off
2173 I.setDesc(TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2178 return false;
2179 }
2180
2181 // Also take the opportunity here to try to do some optimization.
2182 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2183 Register NegatedReg;
2184 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2185 return true;
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(TII.get(TargetOpcode::G_SUB));
2188 return true;
2189}
2190
2191bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2193 // We try to match the immediate variant of LSL, which is actually an alias
2194 // for a special case of UBFM. Otherwise, we fall back to the imported
2195 // selector which will match the register variant.
2196 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2197 const auto &MO = I.getOperand(2);
2198 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2199 if (!VRegAndVal)
2200 return false;
2201
2202 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2203 if (DstTy.isVector())
2204 return false;
2205 bool Is64Bit = DstTy.getSizeInBits() == 64;
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2208
2209 if (!Imm1Fn || !Imm2Fn)
2210 return false;
2211
2212 auto NewI =
2213 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2214 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2215
2216 for (auto &RenderFn : *Imm1Fn)
2217 RenderFn(NewI);
2218 for (auto &RenderFn : *Imm2Fn)
2219 RenderFn(NewI);
2220
2221 I.eraseFromParent();
2222 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2223}
2224
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2228 // If we're storing a scalar, it doesn't matter what register bank that
2229 // scalar is on. All that matters is the size.
2230 //
2231 // So, if we see something like this (with a 32-bit scalar as an example):
2232 //
2233 // %x:gpr(s32) = ... something ...
2234 // %y:fpr(s32) = COPY %x:gpr(s32)
2235 // G_STORE %y:fpr(s32)
2236 //
2237 // We can fix this up into something like this:
2238 //
2239 // G_STORE %x:gpr(s32)
2240 //
2241 // And then continue the selection process normally.
2242 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2243 if (!DefDstReg.isValid())
2244 return false;
2245 LLT DefDstTy = MRI.getType(DefDstReg);
2246 Register StoreSrcReg = I.getOperand(0).getReg();
2247 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2248
2249 // If we get something strange like a physical register, then we shouldn't
2250 // go any further.
2251 if (!DefDstTy.isValid())
2252 return false;
2253
2254 // Are the source and dst types the same size?
2255 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2256 return false;
2257
2258 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2259 RBI.getRegBank(DefDstReg, MRI, TRI))
2260 return false;
2261
2262 // We have a cross-bank copy, which is entering a store. Let's fold it.
2263 I.getOperand(0).setReg(DefDstReg);
2264 return true;
2265}
2266
2267bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2268 assert(I.getParent() && "Instruction should be in a basic block!");
2269 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2270
2271 MachineBasicBlock &MBB = *I.getParent();
2272 MachineFunction &MF = *MBB.getParent();
2274
2275 switch (I.getOpcode()) {
2276 case AArch64::G_DUP: {
2277 // Before selecting a DUP instruction, check if it is better selected as a
2278 // MOV or load from a constant pool.
2279 Register Src = I.getOperand(1).getReg();
2280 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2281 if (!ValAndVReg)
2282 return false;
2283 LLVMContext &Ctx = MF.getFunction().getContext();
2284 Register Dst = I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2287 ConstantInt::get(
2288 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2289 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2290 if (!emitConstantVector(Dst, CV, MIB, MRI))
2291 return false;
2292 I.eraseFromParent();
2293 return true;
2294 }
2295 case TargetOpcode::G_SEXT:
2296 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2297 // over a normal extend.
2298 if (selectUSMovFromExtend(I, MRI))
2299 return true;
2300 return false;
2301 case TargetOpcode::G_BR:
2302 return false;
2303 case TargetOpcode::G_SHL:
2304 return earlySelectSHL(I, MRI);
2305 case TargetOpcode::G_CONSTANT: {
2306 bool IsZero = false;
2307 if (I.getOperand(1).isCImm())
2308 IsZero = I.getOperand(1).getCImm()->isZero();
2309 else if (I.getOperand(1).isImm())
2310 IsZero = I.getOperand(1).getImm() == 0;
2311
2312 if (!IsZero)
2313 return false;
2314
2315 Register DefReg = I.getOperand(0).getReg();
2316 LLT Ty = MRI.getType(DefReg);
2317 if (Ty.getSizeInBits() == 64) {
2318 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2319 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2320 } else if (Ty.getSizeInBits() == 32) {
2321 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2322 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2323 } else
2324 return false;
2325
2326 I.setDesc(TII.get(TargetOpcode::COPY));
2327 return true;
2328 }
2329
2330 case TargetOpcode::G_ADD: {
2331 // Check if this is being fed by a G_ICMP on either side.
2332 //
2333 // (cmp pred, x, y) + z
2334 //
2335 // In the above case, when the cmp is true, we increment z by 1. So, we can
2336 // fold the add into the cset for the cmp by using cinc.
2337 //
2338 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2339 Register AddDst = I.getOperand(0).getReg();
2340 Register AddLHS = I.getOperand(1).getReg();
2341 Register AddRHS = I.getOperand(2).getReg();
2342 // Only handle scalars.
2343 LLT Ty = MRI.getType(AddLHS);
2344 if (Ty.isVector())
2345 return false;
2346 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2347 // bits.
2348 unsigned Size = Ty.getSizeInBits();
2349 if (Size != 32 && Size != 64)
2350 return false;
2351 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2352 if (!MRI.hasOneNonDBGUse(Reg))
2353 return nullptr;
2354 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2355 // compare.
2356 if (Size == 32)
2357 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2358 // We model scalar compares using 32-bit destinations right now.
2359 // If it's a 64-bit compare, it'll have 64-bit sources.
2360 Register ZExt;
2361 if (!mi_match(Reg, MRI,
2363 return nullptr;
2364 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2365 if (!Cmp ||
2366 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2367 return nullptr;
2368 return Cmp;
2369 };
2370 // Try to match
2371 // z + (cmp pred, x, y)
2372 MachineInstr *Cmp = MatchCmp(AddRHS);
2373 if (!Cmp) {
2374 // (cmp pred, x, y) + z
2375 std::swap(AddLHS, AddRHS);
2376 Cmp = MatchCmp(AddRHS);
2377 if (!Cmp)
2378 return false;
2379 }
2380 auto &PredOp = Cmp->getOperand(1);
2381 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2382 const AArch64CC::CondCode InvCC =
2385 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2386 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2387 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2388 I.eraseFromParent();
2389 return true;
2390 }
2391 case TargetOpcode::G_OR: {
2392 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2393 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2394 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2395 Register Dst = I.getOperand(0).getReg();
2396 LLT Ty = MRI.getType(Dst);
2397
2398 if (!Ty.isScalar())
2399 return false;
2400
2401 unsigned Size = Ty.getSizeInBits();
2402 if (Size != 32 && Size != 64)
2403 return false;
2404
2405 Register ShiftSrc;
2406 int64_t ShiftImm;
2407 Register MaskSrc;
2408 int64_t MaskImm;
2409 if (!mi_match(
2410 Dst, MRI,
2411 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2412 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2413 return false;
2414
2415 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2416 return false;
2417
2418 int64_t Immr = Size - ShiftImm;
2419 int64_t Imms = Size - ShiftImm - 1;
2420 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2421 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2422 I.eraseFromParent();
2423 return true;
2424 }
2425 case TargetOpcode::G_FENCE: {
2426 if (I.getOperand(1).getImm() == 0)
2427 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2428 else
2429 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2430 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2431 I.eraseFromParent();
2432 return true;
2433 }
2434 default:
2435 return false;
2436 }
2437}
2438
2439bool AArch64InstructionSelector::select(MachineInstr &I) {
2440 assert(I.getParent() && "Instruction should be in a basic block!");
2441 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2442
2443 MachineBasicBlock &MBB = *I.getParent();
2444 MachineFunction &MF = *MBB.getParent();
2446
2447 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2448 if (Subtarget->requiresStrictAlign()) {
2449 // We don't support this feature yet.
2450 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2451 return false;
2452 }
2453
2455
2456 unsigned Opcode = I.getOpcode();
2457 // G_PHI requires same handling as PHI
2458 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2459 // Certain non-generic instructions also need some special handling.
2460
2461 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2463
2464 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2465 const Register DefReg = I.getOperand(0).getReg();
2466 const LLT DefTy = MRI.getType(DefReg);
2467
2468 const RegClassOrRegBank &RegClassOrBank =
2469 MRI.getRegClassOrRegBank(DefReg);
2470
2471 const TargetRegisterClass *DefRC
2472 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2473 if (!DefRC) {
2474 if (!DefTy.isValid()) {
2475 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2476 return false;
2477 }
2478 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2479 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2480 if (!DefRC) {
2481 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2482 return false;
2483 }
2484 }
2485
2486 I.setDesc(TII.get(TargetOpcode::PHI));
2487
2488 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2489 }
2490
2491 if (I.isCopy())
2492 return selectCopy(I, TII, MRI, TRI, RBI);
2493
2494 if (I.isDebugInstr())
2495 return selectDebugInstr(I, MRI, RBI);
2496
2497 return true;
2498 }
2499
2500
2501 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2502 LLVM_DEBUG(
2503 dbgs() << "Generic instruction has unexpected implicit operands\n");
2504 return false;
2505 }
2506
2507 // Try to do some lowering before we start instruction selecting. These
2508 // lowerings are purely transformations on the input G_MIR and so selection
2509 // must continue after any modification of the instruction.
2510 if (preISelLower(I)) {
2511 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2512 }
2513
2514 // There may be patterns where the importer can't deal with them optimally,
2515 // but does select it to a suboptimal sequence so our custom C++ selection
2516 // code later never has a chance to work on it. Therefore, we have an early
2517 // selection attempt here to give priority to certain selection routines
2518 // over the imported ones.
2519 if (earlySelect(I))
2520 return true;
2521
2522 if (selectImpl(I, *CoverageInfo))
2523 return true;
2524
2525 LLT Ty =
2526 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2527
2528 switch (Opcode) {
2529 case TargetOpcode::G_SBFX:
2530 case TargetOpcode::G_UBFX: {
2531 static const unsigned OpcTable[2][2] = {
2532 {AArch64::UBFMWri, AArch64::UBFMXri},
2533 {AArch64::SBFMWri, AArch64::SBFMXri}};
2534 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2535 unsigned Size = Ty.getSizeInBits();
2536 unsigned Opc = OpcTable[IsSigned][Size == 64];
2537 auto Cst1 =
2538 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2539 assert(Cst1 && "Should have gotten a constant for src 1?");
2540 auto Cst2 =
2541 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2542 assert(Cst2 && "Should have gotten a constant for src 2?");
2543 auto LSB = Cst1->Value.getZExtValue();
2544 auto Width = Cst2->Value.getZExtValue();
2545 auto BitfieldInst =
2546 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2547 .addImm(LSB)
2548 .addImm(LSB + Width - 1);
2549 I.eraseFromParent();
2550 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2551 }
2552 case TargetOpcode::G_BRCOND:
2553 return selectCompareBranch(I, MF, MRI);
2554
2555 case TargetOpcode::G_BRINDIRECT: {
2556 const Function &Fn = MF.getFunction();
2557 if (std::optional<uint16_t> BADisc =
2558 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2559 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2560 MI.addImm(AArch64PACKey::IA);
2561 MI.addImm(*BADisc);
2562 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2563 I.eraseFromParent();
2565 }
2566 I.setDesc(TII.get(AArch64::BR));
2568 }
2569
2570 case TargetOpcode::G_BRJT:
2571 return selectBrJT(I, MRI);
2572
2573 case AArch64::G_ADD_LOW: {
2574 // This op may have been separated from it's ADRP companion by the localizer
2575 // or some other code motion pass. Given that many CPUs will try to
2576 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2577 // which will later be expanded into an ADRP+ADD pair after scheduling.
2578 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2579 if (BaseMI->getOpcode() != AArch64::ADRP) {
2580 I.setDesc(TII.get(AArch64::ADDXri));
2581 I.addOperand(MachineOperand::CreateImm(0));
2583 }
2584 assert(TM.getCodeModel() == CodeModel::Small &&
2585 "Expected small code model");
2586 auto Op1 = BaseMI->getOperand(1);
2587 auto Op2 = I.getOperand(2);
2588 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2589 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2590 Op1.getTargetFlags())
2591 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2592 Op2.getTargetFlags());
2593 I.eraseFromParent();
2594 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2595 }
2596
2597 case TargetOpcode::G_FCONSTANT:
2598 case TargetOpcode::G_CONSTANT: {
2599 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2600
2601 const LLT s8 = LLT::scalar(8);
2602 const LLT s16 = LLT::scalar(16);
2603 const LLT s32 = LLT::scalar(32);
2604 const LLT s64 = LLT::scalar(64);
2605 const LLT s128 = LLT::scalar(128);
2606 const LLT p0 = LLT::pointer(0, 64);
2607
2608 const Register DefReg = I.getOperand(0).getReg();
2609 const LLT DefTy = MRI.getType(DefReg);
2610 const unsigned DefSize = DefTy.getSizeInBits();
2611 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2612
2613 // FIXME: Redundant check, but even less readable when factored out.
2614 if (isFP) {
2615 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2616 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2617 << " constant, expected: " << s16 << " or " << s32
2618 << " or " << s64 << " or " << s128 << '\n');
2619 return false;
2620 }
2621
2622 if (RB.getID() != AArch64::FPRRegBankID) {
2623 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2624 << " constant on bank: " << RB
2625 << ", expected: FPR\n");
2626 return false;
2627 }
2628
2629 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2630 // can be sure tablegen works correctly and isn't rescued by this code.
2631 // 0.0 is not covered by tablegen for FP128. So we will handle this
2632 // scenario in the code here.
2633 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2634 return false;
2635 } else {
2636 // s32 and s64 are covered by tablegen.
2637 if (Ty != p0 && Ty != s8 && Ty != s16) {
2638 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2639 << " constant, expected: " << s32 << ", " << s64
2640 << ", or " << p0 << '\n');
2641 return false;
2642 }
2643
2644 if (RB.getID() != AArch64::GPRRegBankID) {
2645 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2646 << " constant on bank: " << RB
2647 << ", expected: GPR\n");
2648 return false;
2649 }
2650 }
2651
2652 if (isFP) {
2653 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2654 // For 16, 64, and 128b values, emit a constant pool load.
2655 switch (DefSize) {
2656 default:
2657 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2658 case 32:
2659 case 64: {
2660 bool OptForSize = shouldOptForSize(&MF);
2661 const auto &TLI = MF.getSubtarget().getTargetLowering();
2662 // If TLI says that this fpimm is illegal, then we'll expand to a
2663 // constant pool load.
2664 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2665 EVT::getFloatingPointVT(DefSize), OptForSize))
2666 break;
2667 [[fallthrough]];
2668 }
2669 case 16:
2670 case 128: {
2671 auto *FPImm = I.getOperand(1).getFPImm();
2672 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2673 if (!LoadMI) {
2674 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2675 return false;
2676 }
2677 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2678 I.eraseFromParent();
2679 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2680 }
2681 }
2682
2683 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2684 // Either emit a FMOV, or emit a copy to emit a normal mov.
2685 const Register DefGPRReg = MRI.createVirtualRegister(
2686 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2687 MachineOperand &RegOp = I.getOperand(0);
2688 RegOp.setReg(DefGPRReg);
2689 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2690 MIB.buildCopy({DefReg}, {DefGPRReg});
2691
2692 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2693 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2694 return false;
2695 }
2696
2697 MachineOperand &ImmOp = I.getOperand(1);
2698 // FIXME: Is going through int64_t always correct?
2699 ImmOp.ChangeToImmediate(
2701 } else if (I.getOperand(1).isCImm()) {
2702 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2703 I.getOperand(1).ChangeToImmediate(Val);
2704 } else if (I.getOperand(1).isImm()) {
2705 uint64_t Val = I.getOperand(1).getImm();
2706 I.getOperand(1).ChangeToImmediate(Val);
2707 }
2708
2709 const unsigned MovOpc =
2710 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2711 I.setDesc(TII.get(MovOpc));
2713 return true;
2714 }
2715 case TargetOpcode::G_EXTRACT: {
2716 Register DstReg = I.getOperand(0).getReg();
2717 Register SrcReg = I.getOperand(1).getReg();
2718 LLT SrcTy = MRI.getType(SrcReg);
2719 LLT DstTy = MRI.getType(DstReg);
2720 (void)DstTy;
2721 unsigned SrcSize = SrcTy.getSizeInBits();
2722
2723 if (SrcTy.getSizeInBits() > 64) {
2724 // This should be an extract of an s128, which is like a vector extract.
2725 if (SrcTy.getSizeInBits() != 128)
2726 return false;
2727 // Only support extracting 64 bits from an s128 at the moment.
2728 if (DstTy.getSizeInBits() != 64)
2729 return false;
2730
2731 unsigned Offset = I.getOperand(2).getImm();
2732 if (Offset % 64 != 0)
2733 return false;
2734
2735 // Check we have the right regbank always.
2736 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2737 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2738 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2739
2740 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2741 auto NewI =
2742 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2743 .addUse(SrcReg, 0,
2744 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2745 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2746 AArch64::GPR64RegClass, NewI->getOperand(0));
2747 I.eraseFromParent();
2748 return true;
2749 }
2750
2751 // Emit the same code as a vector extract.
2752 // Offset must be a multiple of 64.
2753 unsigned LaneIdx = Offset / 64;
2754 MachineInstr *Extract = emitExtractVectorElt(
2755 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2756 if (!Extract)
2757 return false;
2758 I.eraseFromParent();
2759 return true;
2760 }
2761
2762 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2763 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2764 Ty.getSizeInBits() - 1);
2765
2766 if (SrcSize < 64) {
2767 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2768 "unexpected G_EXTRACT types");
2770 }
2771
2772 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2773 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2774 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2775 .addReg(DstReg, 0, AArch64::sub_32);
2776 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2777 AArch64::GPR32RegClass, MRI);
2778 I.getOperand(0).setReg(DstReg);
2779
2781 }
2782
2783 case TargetOpcode::G_INSERT: {
2784 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2785 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2786 unsigned DstSize = DstTy.getSizeInBits();
2787 // Larger inserts are vectors, same-size ones should be something else by
2788 // now (split up or turned into COPYs).
2789 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2790 return false;
2791
2792 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2793 unsigned LSB = I.getOperand(3).getImm();
2794 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2795 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2796 MachineInstrBuilder(MF, I).addImm(Width - 1);
2797
2798 if (DstSize < 64) {
2799 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2800 "unexpected G_INSERT types");
2802 }
2803
2804 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2805 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2806 TII.get(AArch64::SUBREG_TO_REG))
2807 .addDef(SrcReg)
2808 .addImm(0)
2809 .addUse(I.getOperand(2).getReg())
2810 .addImm(AArch64::sub_32);
2811 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2812 AArch64::GPR32RegClass, MRI);
2813 I.getOperand(2).setReg(SrcReg);
2814
2816 }
2817 case TargetOpcode::G_FRAME_INDEX: {
2818 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2819 if (Ty != LLT::pointer(0, 64)) {
2820 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2821 << ", expected: " << LLT::pointer(0, 64) << '\n');
2822 return false;
2823 }
2824 I.setDesc(TII.get(AArch64::ADDXri));
2825
2826 // MOs for a #0 shifted immediate.
2827 I.addOperand(MachineOperand::CreateImm(0));
2828 I.addOperand(MachineOperand::CreateImm(0));
2829
2831 }
2832
2833 case TargetOpcode::G_GLOBAL_VALUE: {
2834 const GlobalValue *GV = nullptr;
2835 unsigned OpFlags;
2836 if (I.getOperand(1).isSymbol()) {
2837 OpFlags = I.getOperand(1).getTargetFlags();
2838 // Currently only used by "RtLibUseGOT".
2839 assert(OpFlags == AArch64II::MO_GOT);
2840 } else {
2841 GV = I.getOperand(1).getGlobal();
2842 if (GV->isThreadLocal())
2843 return selectTLSGlobalValue(I, MRI);
2844 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2845 }
2846
2847 if (OpFlags & AArch64II::MO_GOT) {
2848 I.setDesc(TII.get(AArch64::LOADgot));
2849 I.getOperand(1).setTargetFlags(OpFlags);
2850 } else if (TM.getCodeModel() == CodeModel::Large &&
2851 !TM.isPositionIndependent()) {
2852 // Materialize the global using movz/movk instructions.
2853 materializeLargeCMVal(I, GV, OpFlags);
2854 I.eraseFromParent();
2855 return true;
2856 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2857 I.setDesc(TII.get(AArch64::ADR));
2858 I.getOperand(1).setTargetFlags(OpFlags);
2859 } else {
2860 I.setDesc(TII.get(AArch64::MOVaddr));
2861 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2862 MachineInstrBuilder MIB(MF, I);
2863 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2865 }
2867 }
2868
2869 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2870 return selectPtrAuthGlobalValue(I, MRI);
2871
2872 case TargetOpcode::G_ZEXTLOAD:
2873 case TargetOpcode::G_LOAD:
2874 case TargetOpcode::G_STORE: {
2875 GLoadStore &LdSt = cast<GLoadStore>(I);
2876 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2877 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2878
2879 if (PtrTy != LLT::pointer(0, 64)) {
2880 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2881 << ", expected: " << LLT::pointer(0, 64) << '\n');
2882 return false;
2883 }
2884
2885 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2886 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2887 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2888
2889 // Need special instructions for atomics that affect ordering.
2890 if (Order != AtomicOrdering::NotAtomic &&
2891 Order != AtomicOrdering::Unordered &&
2892 Order != AtomicOrdering::Monotonic) {
2893 assert(!isa<GZExtLoad>(LdSt));
2894 assert(MemSizeInBytes <= 8 &&
2895 "128-bit atomics should already be custom-legalized");
2896
2897 if (isa<GLoad>(LdSt)) {
2898 static constexpr unsigned LDAPROpcodes[] = {
2899 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2900 static constexpr unsigned LDAROpcodes[] = {
2901 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2902 ArrayRef<unsigned> Opcodes =
2903 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2904 ? LDAPROpcodes
2905 : LDAROpcodes;
2906 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2907 } else {
2908 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2909 AArch64::STLRW, AArch64::STLRX};
2910 Register ValReg = LdSt.getReg(0);
2911 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2912 // Emit a subreg copy of 32 bits.
2913 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2914 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2915 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2916 I.getOperand(0).setReg(NewVal);
2917 }
2918 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2919 }
2921 return true;
2922 }
2923
2924#ifndef NDEBUG
2925 const Register PtrReg = LdSt.getPointerReg();
2926 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2927 // Check that the pointer register is valid.
2928 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2929 "Load/Store pointer operand isn't a GPR");
2930 assert(MRI.getType(PtrReg).isPointer() &&
2931 "Load/Store pointer operand isn't a pointer");
2932#endif
2933
2934 const Register ValReg = LdSt.getReg(0);
2935 const LLT ValTy = MRI.getType(ValReg);
2936 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2937
2938 // The code below doesn't support truncating stores, so we need to split it
2939 // again.
2940 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2941 unsigned SubReg;
2942 LLT MemTy = LdSt.getMMO().getMemoryType();
2943 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2944 if (!getSubRegForClass(RC, TRI, SubReg))
2945 return false;
2946
2947 // Generate a subreg copy.
2948 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2949 .addReg(ValReg, 0, SubReg)
2950 .getReg(0);
2951 RBI.constrainGenericRegister(Copy, *RC, MRI);
2952 LdSt.getOperand(0).setReg(Copy);
2953 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2954 // If this is an any-extending load from the FPR bank, split it into a regular
2955 // load + extend.
2956 if (RB.getID() == AArch64::FPRRegBankID) {
2957 unsigned SubReg;
2958 LLT MemTy = LdSt.getMMO().getMemoryType();
2959 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2960 if (!getSubRegForClass(RC, TRI, SubReg))
2961 return false;
2962 Register OldDst = LdSt.getReg(0);
2963 Register NewDst =
2964 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2965 LdSt.getOperand(0).setReg(NewDst);
2966 MRI.setRegBank(NewDst, RB);
2967 // Generate a SUBREG_TO_REG to extend it.
2968 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2969 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2970 .addImm(0)
2971 .addUse(NewDst)
2972 .addImm(SubReg);
2973 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2974 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2975 MIB.setInstr(LdSt);
2976 }
2977 }
2978
2979 // Helper lambda for partially selecting I. Either returns the original
2980 // instruction with an updated opcode, or a new instruction.
2981 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2982 bool IsStore = isa<GStore>(I);
2983 const unsigned NewOpc =
2984 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2985 if (NewOpc == I.getOpcode())
2986 return nullptr;
2987 // Check if we can fold anything into the addressing mode.
2988 auto AddrModeFns =
2989 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2990 if (!AddrModeFns) {
2991 // Can't fold anything. Use the original instruction.
2992 I.setDesc(TII.get(NewOpc));
2993 I.addOperand(MachineOperand::CreateImm(0));
2994 return &I;
2995 }
2996
2997 // Folded something. Create a new instruction and return it.
2998 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2999 Register CurValReg = I.getOperand(0).getReg();
3000 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3001 NewInst.cloneMemRefs(I);
3002 for (auto &Fn : *AddrModeFns)
3003 Fn(NewInst);
3004 I.eraseFromParent();
3005 return &*NewInst;
3006 };
3007
3008 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3009 if (!LoadStore)
3010 return false;
3011
3012 // If we're storing a 0, use WZR/XZR.
3013 if (Opcode == TargetOpcode::G_STORE) {
3015 LoadStore->getOperand(0).getReg(), MRI);
3016 if (CVal && CVal->Value == 0) {
3017 switch (LoadStore->getOpcode()) {
3018 case AArch64::STRWui:
3019 case AArch64::STRHHui:
3020 case AArch64::STRBBui:
3021 LoadStore->getOperand(0).setReg(AArch64::WZR);
3022 break;
3023 case AArch64::STRXui:
3024 LoadStore->getOperand(0).setReg(AArch64::XZR);
3025 break;
3026 }
3027 }
3028 }
3029
3030 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3031 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3032 // The any/zextload from a smaller type to i32 should be handled by the
3033 // importer.
3034 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3035 return false;
3036 // If we have an extending load then change the load's type to be a
3037 // narrower reg and zero_extend with SUBREG_TO_REG.
3038 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3039 Register DstReg = LoadStore->getOperand(0).getReg();
3040 LoadStore->getOperand(0).setReg(LdReg);
3041
3042 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3043 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3044 .addImm(0)
3045 .addUse(LdReg)
3046 .addImm(AArch64::sub_32);
3047 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3048 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3049 MRI);
3050 }
3051 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3052 }
3053
3054 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3055 case TargetOpcode::G_INDEXED_SEXTLOAD:
3056 return selectIndexedExtLoad(I, MRI);
3057 case TargetOpcode::G_INDEXED_LOAD:
3058 return selectIndexedLoad(I, MRI);
3059 case TargetOpcode::G_INDEXED_STORE:
3060 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3061
3062 case TargetOpcode::G_LSHR:
3063 case TargetOpcode::G_ASHR:
3064 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3065 return selectVectorAshrLshr(I, MRI);
3066 [[fallthrough]];
3067 case TargetOpcode::G_SHL:
3068 if (Opcode == TargetOpcode::G_SHL &&
3069 MRI.getType(I.getOperand(0).getReg()).isVector())
3070 return selectVectorSHL(I, MRI);
3071
3072 // These shifts were legalized to have 64 bit shift amounts because we
3073 // want to take advantage of the selection patterns that assume the
3074 // immediates are s64s, however, selectBinaryOp will assume both operands
3075 // will have the same bit size.
3076 {
3077 Register SrcReg = I.getOperand(1).getReg();
3078 Register ShiftReg = I.getOperand(2).getReg();
3079 const LLT ShiftTy = MRI.getType(ShiftReg);
3080 const LLT SrcTy = MRI.getType(SrcReg);
3081 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3082 ShiftTy.getSizeInBits() == 64) {
3083 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3084 // Insert a subregister copy to implement a 64->32 trunc
3085 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3086 .addReg(ShiftReg, 0, AArch64::sub_32);
3087 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3088 I.getOperand(2).setReg(Trunc.getReg(0));
3089 }
3090 }
3091 [[fallthrough]];
3092 case TargetOpcode::G_OR: {
3093 // Reject the various things we don't support yet.
3094 if (unsupportedBinOp(I, RBI, MRI, TRI))
3095 return false;
3096
3097 const unsigned OpSize = Ty.getSizeInBits();
3098
3099 const Register DefReg = I.getOperand(0).getReg();
3100 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3101
3102 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3103 if (NewOpc == I.getOpcode())
3104 return false;
3105
3106 I.setDesc(TII.get(NewOpc));
3107 // FIXME: Should the type be always reset in setDesc?
3108
3109 // Now that we selected an opcode, we need to constrain the register
3110 // operands to use appropriate classes.
3112 }
3113
3114 case TargetOpcode::G_PTR_ADD: {
3115 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3116 I.eraseFromParent();
3117 return true;
3118 }
3119
3120 case TargetOpcode::G_SADDE:
3121 case TargetOpcode::G_UADDE:
3122 case TargetOpcode::G_SSUBE:
3123 case TargetOpcode::G_USUBE:
3124 case TargetOpcode::G_SADDO:
3125 case TargetOpcode::G_UADDO:
3126 case TargetOpcode::G_SSUBO:
3127 case TargetOpcode::G_USUBO:
3128 return selectOverflowOp(I, MRI);
3129
3130 case TargetOpcode::G_PTRMASK: {
3131 Register MaskReg = I.getOperand(2).getReg();
3132 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3133 // TODO: Implement arbitrary cases
3134 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3135 return false;
3136
3137 uint64_t Mask = *MaskVal;
3138 I.setDesc(TII.get(AArch64::ANDXri));
3139 I.getOperand(2).ChangeToImmediate(
3141
3143 }
3144 case TargetOpcode::G_PTRTOINT:
3145 case TargetOpcode::G_TRUNC: {
3146 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3147 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3148
3149 const Register DstReg = I.getOperand(0).getReg();
3150 const Register SrcReg = I.getOperand(1).getReg();
3151
3152 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3153 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3154
3155 if (DstRB.getID() != SrcRB.getID()) {
3156 LLVM_DEBUG(
3157 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3158 return false;
3159 }
3160
3161 if (DstRB.getID() == AArch64::GPRRegBankID) {
3162 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3163 if (!DstRC)
3164 return false;
3165
3166 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3167 if (!SrcRC)
3168 return false;
3169
3170 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3171 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3172 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3173 return false;
3174 }
3175
3176 if (DstRC == SrcRC) {
3177 // Nothing to be done
3178 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3179 SrcTy == LLT::scalar(64)) {
3180 llvm_unreachable("TableGen can import this case");
3181 return false;
3182 } else if (DstRC == &AArch64::GPR32RegClass &&
3183 SrcRC == &AArch64::GPR64RegClass) {
3184 I.getOperand(1).setSubReg(AArch64::sub_32);
3185 } else {
3186 LLVM_DEBUG(
3187 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3188 return false;
3189 }
3190
3191 I.setDesc(TII.get(TargetOpcode::COPY));
3192 return true;
3193 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3194 if (DstTy == LLT::fixed_vector(4, 16) &&
3195 SrcTy == LLT::fixed_vector(4, 32)) {
3196 I.setDesc(TII.get(AArch64::XTNv4i16));
3198 return true;
3199 }
3200
3201 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3202 MachineInstr *Extract = emitExtractVectorElt(
3203 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3204 if (!Extract)
3205 return false;
3206 I.eraseFromParent();
3207 return true;
3208 }
3209
3210 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3211 if (Opcode == TargetOpcode::G_PTRTOINT) {
3212 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3213 I.setDesc(TII.get(TargetOpcode::COPY));
3214 return selectCopy(I, TII, MRI, TRI, RBI);
3215 }
3216 }
3217
3218 return false;
3219 }
3220
3221 case TargetOpcode::G_ANYEXT: {
3222 if (selectUSMovFromExtend(I, MRI))
3223 return true;
3224
3225 const Register DstReg = I.getOperand(0).getReg();
3226 const Register SrcReg = I.getOperand(1).getReg();
3227
3228 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3229 if (RBDst.getID() != AArch64::GPRRegBankID) {
3230 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3231 << ", expected: GPR\n");
3232 return false;
3233 }
3234
3235 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3236 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3237 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3238 << ", expected: GPR\n");
3239 return false;
3240 }
3241
3242 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3243
3244 if (DstSize == 0) {
3245 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3246 return false;
3247 }
3248
3249 if (DstSize != 64 && DstSize > 32) {
3250 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3251 << ", expected: 32 or 64\n");
3252 return false;
3253 }
3254 // At this point G_ANYEXT is just like a plain COPY, but we need
3255 // to explicitly form the 64-bit value if any.
3256 if (DstSize > 32) {
3257 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3258 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3259 .addDef(ExtSrc)
3260 .addImm(0)
3261 .addUse(SrcReg)
3262 .addImm(AArch64::sub_32);
3263 I.getOperand(1).setReg(ExtSrc);
3264 }
3265 return selectCopy(I, TII, MRI, TRI, RBI);
3266 }
3267
3268 case TargetOpcode::G_ZEXT:
3269 case TargetOpcode::G_SEXT_INREG:
3270 case TargetOpcode::G_SEXT: {
3271 if (selectUSMovFromExtend(I, MRI))
3272 return true;
3273
3274 unsigned Opcode = I.getOpcode();
3275 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3276 const Register DefReg = I.getOperand(0).getReg();
3277 Register SrcReg = I.getOperand(1).getReg();
3278 const LLT DstTy = MRI.getType(DefReg);
3279 const LLT SrcTy = MRI.getType(SrcReg);
3280 unsigned DstSize = DstTy.getSizeInBits();
3281 unsigned SrcSize = SrcTy.getSizeInBits();
3282
3283 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3284 // extended is encoded in the imm.
3285 if (Opcode == TargetOpcode::G_SEXT_INREG)
3286 SrcSize = I.getOperand(2).getImm();
3287
3288 if (DstTy.isVector())
3289 return false; // Should be handled by imported patterns.
3290
3291 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3292 AArch64::GPRRegBankID &&
3293 "Unexpected ext regbank");
3294
3295 MachineInstr *ExtI;
3296
3297 // First check if we're extending the result of a load which has a dest type
3298 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3299 // GPR register on AArch64 and all loads which are smaller automatically
3300 // zero-extend the upper bits. E.g.
3301 // %v(s8) = G_LOAD %p, :: (load 1)
3302 // %v2(s32) = G_ZEXT %v(s8)
3303 if (!IsSigned) {
3304 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3305 bool IsGPR =
3306 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3307 if (LoadMI && IsGPR) {
3308 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3309 unsigned BytesLoaded = MemOp->getSize().getValue();
3310 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3311 return selectCopy(I, TII, MRI, TRI, RBI);
3312 }
3313
3314 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3315 // + SUBREG_TO_REG.
3316 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3317 Register SubregToRegSrc =
3318 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3319 const Register ZReg = AArch64::WZR;
3320 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3321 .addImm(0);
3322
3323 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3324 .addImm(0)
3325 .addUse(SubregToRegSrc)
3326 .addImm(AArch64::sub_32);
3327
3328 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3329 MRI)) {
3330 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3331 return false;
3332 }
3333
3334 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3335 MRI)) {
3336 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3337 return false;
3338 }
3339
3340 I.eraseFromParent();
3341 return true;
3342 }
3343 }
3344
3345 if (DstSize == 64) {
3346 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3347 // FIXME: Can we avoid manually doing this?
3348 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3349 MRI)) {
3350 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3351 << " operand\n");
3352 return false;
3353 }
3354 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3355 {&AArch64::GPR64RegClass}, {})
3356 .addImm(0)
3357 .addUse(SrcReg)
3358 .addImm(AArch64::sub_32)
3359 .getReg(0);
3360 }
3361
3362 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3363 {DefReg}, {SrcReg})
3364 .addImm(0)
3365 .addImm(SrcSize - 1);
3366 } else if (DstSize <= 32) {
3367 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3368 {DefReg}, {SrcReg})
3369 .addImm(0)
3370 .addImm(SrcSize - 1);
3371 } else {
3372 return false;
3373 }
3374
3376 I.eraseFromParent();
3377 return true;
3378 }
3379
3380 case TargetOpcode::G_SITOFP:
3381 case TargetOpcode::G_UITOFP:
3382 case TargetOpcode::G_FPTOSI:
3383 case TargetOpcode::G_FPTOUI: {
3384 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3385 SrcTy = MRI.getType(I.getOperand(1).getReg());
3386 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3387 if (NewOpc == Opcode)
3388 return false;
3389
3390 I.setDesc(TII.get(NewOpc));
3392 I.setFlags(MachineInstr::NoFPExcept);
3393
3394 return true;
3395 }
3396
3397 case TargetOpcode::G_FREEZE:
3398 return selectCopy(I, TII, MRI, TRI, RBI);
3399
3400 case TargetOpcode::G_INTTOPTR:
3401 // The importer is currently unable to import pointer types since they
3402 // didn't exist in SelectionDAG.
3403 return selectCopy(I, TII, MRI, TRI, RBI);
3404
3405 case TargetOpcode::G_BITCAST:
3406 // Imported SelectionDAG rules can handle every bitcast except those that
3407 // bitcast from a type to the same type. Ideally, these shouldn't occur
3408 // but we might not run an optimizer that deletes them. The other exception
3409 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3410 // of them.
3411 return selectCopy(I, TII, MRI, TRI, RBI);
3412
3413 case TargetOpcode::G_SELECT: {
3414 auto &Sel = cast<GSelect>(I);
3415 const Register CondReg = Sel.getCondReg();
3416 const Register TReg = Sel.getTrueReg();
3417 const Register FReg = Sel.getFalseReg();
3418
3419 if (tryOptSelect(Sel))
3420 return true;
3421
3422 // Make sure to use an unused vreg instead of wzr, so that the peephole
3423 // optimizations will be able to optimize these.
3424 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3425 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3426 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3428 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3429 return false;
3430 Sel.eraseFromParent();
3431 return true;
3432 }
3433 case TargetOpcode::G_ICMP: {
3434 if (Ty.isVector())
3435 return false;
3436
3437 if (Ty != LLT::scalar(32)) {
3438 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3439 << ", expected: " << LLT::scalar(32) << '\n');
3440 return false;
3441 }
3442
3443 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3444 const AArch64CC::CondCode InvCC =
3446 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3447 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3448 /*Src2=*/AArch64::WZR, InvCC, MIB);
3449 I.eraseFromParent();
3450 return true;
3451 }
3452
3453 case TargetOpcode::G_FCMP: {
3454 CmpInst::Predicate Pred =
3455 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3456 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3457 Pred) ||
3458 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3459 return false;
3460 I.eraseFromParent();
3461 return true;
3462 }
3463 case TargetOpcode::G_VASTART:
3464 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3465 : selectVaStartAAPCS(I, MF, MRI);
3466 case TargetOpcode::G_INTRINSIC:
3467 return selectIntrinsic(I, MRI);
3468 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3469 return selectIntrinsicWithSideEffects(I, MRI);
3470 case TargetOpcode::G_IMPLICIT_DEF: {
3471 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3472 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3473 const Register DstReg = I.getOperand(0).getReg();
3474 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3475 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3476 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3477 return true;
3478 }
3479 case TargetOpcode::G_BLOCK_ADDR: {
3480 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3481 if (std::optional<uint16_t> BADisc =
3482 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3483 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3484 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3485 MIB.buildInstr(AArch64::MOVaddrPAC)
3486 .addBlockAddress(I.getOperand(1).getBlockAddress())
3488 .addReg(/*AddrDisc=*/AArch64::XZR)
3489 .addImm(*BADisc)
3490 .constrainAllUses(TII, TRI, RBI);
3491 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3492 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3493 AArch64::GPR64RegClass, MRI);
3494 I.eraseFromParent();
3495 return true;
3496 }
3497 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3498 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3499 I.eraseFromParent();
3500 return true;
3501 } else {
3502 I.setDesc(TII.get(AArch64::MOVaddrBA));
3503 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3504 I.getOperand(0).getReg())
3505 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3506 /* Offset */ 0, AArch64II::MO_PAGE)
3508 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3510 I.eraseFromParent();
3511 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3512 }
3513 }
3514 case AArch64::G_DUP: {
3515 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3516 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3517 // difficult because at RBS we may end up pessimizing the fpr case if we
3518 // decided to add an anyextend to fix this. Manual selection is the most
3519 // robust solution for now.
3520 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3521 AArch64::GPRRegBankID)
3522 return false; // We expect the fpr regbank case to be imported.
3523 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3524 if (VecTy == LLT::fixed_vector(8, 8))
3525 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3526 else if (VecTy == LLT::fixed_vector(16, 8))
3527 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3528 else if (VecTy == LLT::fixed_vector(4, 16))
3529 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3530 else if (VecTy == LLT::fixed_vector(8, 16))
3531 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3532 else
3533 return false;
3535 }
3536 case TargetOpcode::G_BUILD_VECTOR:
3537 return selectBuildVector(I, MRI);
3538 case TargetOpcode::G_MERGE_VALUES:
3539 return selectMergeValues(I, MRI);
3540 case TargetOpcode::G_UNMERGE_VALUES:
3541 return selectUnmergeValues(I, MRI);
3542 case TargetOpcode::G_SHUFFLE_VECTOR:
3543 return selectShuffleVector(I, MRI);
3544 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3545 return selectExtractElt(I, MRI);
3546 case TargetOpcode::G_CONCAT_VECTORS:
3547 return selectConcatVectors(I, MRI);
3548 case TargetOpcode::G_JUMP_TABLE:
3549 return selectJumpTable(I, MRI);
3550 case TargetOpcode::G_MEMCPY:
3551 case TargetOpcode::G_MEMCPY_INLINE:
3552 case TargetOpcode::G_MEMMOVE:
3553 case TargetOpcode::G_MEMSET:
3554 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3555 return selectMOPS(I, MRI);
3556 }
3557
3558 return false;
3559}
3560
3561bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3562 MachineIRBuilderState OldMIBState = MIB.getState();
3563 bool Success = select(I);
3564 MIB.setState(OldMIBState);
3565 return Success;
3566}
3567
3568bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3570 unsigned Mopcode;
3571 switch (GI.getOpcode()) {
3572 case TargetOpcode::G_MEMCPY:
3573 case TargetOpcode::G_MEMCPY_INLINE:
3574 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3575 break;
3576 case TargetOpcode::G_MEMMOVE:
3577 Mopcode = AArch64::MOPSMemoryMovePseudo;
3578 break;
3579 case TargetOpcode::G_MEMSET:
3580 // For tagged memset see llvm.aarch64.mops.memset.tag
3581 Mopcode = AArch64::MOPSMemorySetPseudo;
3582 break;
3583 }
3584
3585 auto &DstPtr = GI.getOperand(0);
3586 auto &SrcOrVal = GI.getOperand(1);
3587 auto &Size = GI.getOperand(2);
3588
3589 // Create copies of the registers that can be clobbered.
3590 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3591 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3592 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3593
3594 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3595 const auto &SrcValRegClass =
3596 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3597
3598 // Constrain to specific registers
3599 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3600 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3601 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3602
3603 MIB.buildCopy(DstPtrCopy, DstPtr);
3604 MIB.buildCopy(SrcValCopy, SrcOrVal);
3605 MIB.buildCopy(SizeCopy, Size);
3606
3607 // New instruction uses the copied registers because it must update them.
3608 // The defs are not used since they don't exist in G_MEM*. They are still
3609 // tied.
3610 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3611 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3612 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3613 if (IsSet) {
3614 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3615 {DstPtrCopy, SizeCopy, SrcValCopy});
3616 } else {
3617 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3618 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3619 {DstPtrCopy, SrcValCopy, SizeCopy});
3620 }
3621
3622 GI.eraseFromParent();
3623 return true;
3624}
3625
3626bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3628 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3629 Register JTAddr = I.getOperand(0).getReg();
3630 unsigned JTI = I.getOperand(1).getIndex();
3631 Register Index = I.getOperand(2).getReg();
3632
3633 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3634
3635 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3636 // sequence later, to guarantee the integrity of the intermediate values.
3637 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3638 CodeModel::Model CM = TM.getCodeModel();
3639 if (STI.isTargetMachO()) {
3640 if (CM != CodeModel::Small && CM != CodeModel::Large)
3641 report_fatal_error("Unsupported code-model for hardened jump-table");
3642 } else {
3643 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3644 assert(STI.isTargetELF() &&
3645 "jump table hardening only supported on MachO/ELF");
3646 if (CM != CodeModel::Small)
3647 report_fatal_error("Unsupported code-model for hardened jump-table");
3648 }
3649
3650 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3651 MIB.buildInstr(AArch64::BR_JumpTable)
3652 .addJumpTableIndex(I.getOperand(1).getIndex());
3653 I.eraseFromParent();
3654 return true;
3655 }
3656
3657 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3658 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3659
3660 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3661 {TargetReg, ScratchReg}, {JTAddr, Index})
3662 .addJumpTableIndex(JTI);
3663 // Save the jump table info.
3664 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3665 {static_cast<int64_t>(JTI)});
3666 // Build the indirect branch.
3667 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3668 I.eraseFromParent();
3669 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3670}
3671
3672bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3674 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3675 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3676
3677 Register DstReg = I.getOperand(0).getReg();
3678 unsigned JTI = I.getOperand(1).getIndex();
3679 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3680 auto MovMI =
3681 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3682 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3684 I.eraseFromParent();
3685 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3686}
3687
3688bool AArch64InstructionSelector::selectTLSGlobalValue(
3690 if (!STI.isTargetMachO())
3691 return false;
3692 MachineFunction &MF = *I.getParent()->getParent();
3693 MF.getFrameInfo().setAdjustsStack(true);
3694
3695 const auto &GlobalOp = I.getOperand(1);
3696 assert(GlobalOp.getOffset() == 0 &&
3697 "Shouldn't have an offset on TLS globals!");
3698 const GlobalValue &GV = *GlobalOp.getGlobal();
3699
3700 auto LoadGOT =
3701 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3702 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3703
3704 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3705 {LoadGOT.getReg(0)})
3706 .addImm(0);
3707
3708 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3709 // TLS calls preserve all registers except those that absolutely must be
3710 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3711 // silly).
3712 unsigned Opcode = getBLRCallOpcode(MF);
3713
3714 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3715 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3716 assert(Opcode == AArch64::BLR);
3717 Opcode = AArch64::BLRAAZ;
3718 }
3719
3720 MIB.buildInstr(Opcode, {}, {Load})
3721 .addUse(AArch64::X0, RegState::Implicit)
3722 .addDef(AArch64::X0, RegState::Implicit)
3723 .addRegMask(TRI.getTLSCallPreservedMask());
3724
3725 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3726 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3727 MRI);
3728 I.eraseFromParent();
3729 return true;
3730}
3731
3732MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3733 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3734 MachineIRBuilder &MIRBuilder) const {
3735 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3736
3737 auto BuildFn = [&](unsigned SubregIndex) {
3738 auto Ins =
3739 MIRBuilder
3740 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3741 .addImm(SubregIndex);
3744 return &*Ins;
3745 };
3746
3747 switch (EltSize) {
3748 case 8:
3749 return BuildFn(AArch64::bsub);
3750 case 16:
3751 return BuildFn(AArch64::hsub);
3752 case 32:
3753 return BuildFn(AArch64::ssub);
3754 case 64:
3755 return BuildFn(AArch64::dsub);
3756 default:
3757 return nullptr;
3758 }
3759}
3760
3762AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3763 MachineIRBuilder &MIB,
3764 MachineRegisterInfo &MRI) const {
3765 LLT DstTy = MRI.getType(DstReg);
3766 const TargetRegisterClass *RC =
3767 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3768 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3769 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3770 return nullptr;
3771 }
3772 unsigned SubReg = 0;
3773 if (!getSubRegForClass(RC, TRI, SubReg))
3774 return nullptr;
3775 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3776 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3777 << DstTy.getSizeInBits() << "\n");
3778 return nullptr;
3779 }
3780 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3781 .addReg(SrcReg, 0, SubReg);
3782 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3783 return Copy;
3784}
3785
3786bool AArch64InstructionSelector::selectMergeValues(
3788 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3789 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3790 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3791 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3792 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3793
3794 if (I.getNumOperands() != 3)
3795 return false;
3796
3797 // Merging 2 s64s into an s128.
3798 if (DstTy == LLT::scalar(128)) {
3799 if (SrcTy.getSizeInBits() != 64)
3800 return false;
3801 Register DstReg = I.getOperand(0).getReg();
3802 Register Src1Reg = I.getOperand(1).getReg();
3803 Register Src2Reg = I.getOperand(2).getReg();
3804 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3805 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3806 /* LaneIdx */ 0, RB, MIB);
3807 if (!InsMI)
3808 return false;
3809 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3810 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3811 if (!Ins2MI)
3812 return false;
3815 I.eraseFromParent();
3816 return true;
3817 }
3818
3819 if (RB.getID() != AArch64::GPRRegBankID)
3820 return false;
3821
3822 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3823 return false;
3824
3825 auto *DstRC = &AArch64::GPR64RegClass;
3826 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3827 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3828 TII.get(TargetOpcode::SUBREG_TO_REG))
3829 .addDef(SubToRegDef)
3830 .addImm(0)
3831 .addUse(I.getOperand(1).getReg())
3832 .addImm(AArch64::sub_32);
3833 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3834 // Need to anyext the second scalar before we can use bfm
3835 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3836 TII.get(TargetOpcode::SUBREG_TO_REG))
3837 .addDef(SubToRegDef2)
3838 .addImm(0)
3839 .addUse(I.getOperand(2).getReg())
3840 .addImm(AArch64::sub_32);
3841 MachineInstr &BFM =
3842 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3843 .addDef(I.getOperand(0).getReg())
3844 .addUse(SubToRegDef)
3845 .addUse(SubToRegDef2)
3846 .addImm(32)
3847 .addImm(31);
3848 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3849 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3851 I.eraseFromParent();
3852 return true;
3853}
3854
3855static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3856 const unsigned EltSize) {
3857 // Choose a lane copy opcode and subregister based off of the size of the
3858 // vector's elements.
3859 switch (EltSize) {
3860 case 8:
3861 CopyOpc = AArch64::DUPi8;
3862 ExtractSubReg = AArch64::bsub;
3863 break;
3864 case 16:
3865 CopyOpc = AArch64::DUPi16;
3866 ExtractSubReg = AArch64::hsub;
3867 break;
3868 case 32:
3869 CopyOpc = AArch64::DUPi32;
3870 ExtractSubReg = AArch64::ssub;
3871 break;
3872 case 64:
3873 CopyOpc = AArch64::DUPi64;
3874 ExtractSubReg = AArch64::dsub;
3875 break;
3876 default:
3877 // Unknown size, bail out.
3878 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3879 return false;
3880 }
3881 return true;
3882}
3883
3884MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3885 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3886 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3887 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3888 unsigned CopyOpc = 0;
3889 unsigned ExtractSubReg = 0;
3890 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3891 LLVM_DEBUG(
3892 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3893 return nullptr;
3894 }
3895
3896 const TargetRegisterClass *DstRC =
3897 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3898 if (!DstRC) {
3899 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3900 return nullptr;
3901 }
3902
3903 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3904 const LLT &VecTy = MRI.getType(VecReg);
3905 const TargetRegisterClass *VecRC =
3906 getRegClassForTypeOnBank(VecTy, VecRB, true);
3907 if (!VecRC) {
3908 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3909 return nullptr;
3910 }
3911
3912 // The register that we're going to copy into.
3913 Register InsertReg = VecReg;
3914 if (!DstReg)
3915 DstReg = MRI.createVirtualRegister(DstRC);
3916 // If the lane index is 0, we just use a subregister COPY.
3917 if (LaneIdx == 0) {
3918 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3919 .addReg(VecReg, 0, ExtractSubReg);
3920 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3921 return &*Copy;
3922 }
3923
3924 // Lane copies require 128-bit wide registers. If we're dealing with an
3925 // unpacked vector, then we need to move up to that width. Insert an implicit
3926 // def and a subregister insert to get us there.
3927 if (VecTy.getSizeInBits() != 128) {
3928 MachineInstr *ScalarToVector = emitScalarToVector(
3929 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3930 if (!ScalarToVector)
3931 return nullptr;
3932 InsertReg = ScalarToVector->getOperand(0).getReg();
3933 }
3934
3935 MachineInstr *LaneCopyMI =
3936 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3937 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3938
3939 // Make sure that we actually constrain the initial copy.
3940 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3941 return LaneCopyMI;
3942}
3943
3944bool AArch64InstructionSelector::selectExtractElt(
3946 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3947 "unexpected opcode!");
3948 Register DstReg = I.getOperand(0).getReg();
3949 const LLT NarrowTy = MRI.getType(DstReg);
3950 const Register SrcReg = I.getOperand(1).getReg();
3951 const LLT WideTy = MRI.getType(SrcReg);
3952 (void)WideTy;
3953 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3954 "source register size too small!");
3955 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3956
3957 // Need the lane index to determine the correct copy opcode.
3958 MachineOperand &LaneIdxOp = I.getOperand(2);
3959 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3960
3961 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3962 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3963 return false;
3964 }
3965
3966 // Find the index to extract from.
3967 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3968 if (!VRegAndVal)
3969 return false;
3970 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3971
3972
3973 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3974 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3975 LaneIdx, MIB);
3976 if (!Extract)
3977 return false;
3978
3979 I.eraseFromParent();
3980 return true;
3981}
3982
3983bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3985 unsigned NumElts = I.getNumOperands() - 1;
3986 Register SrcReg = I.getOperand(NumElts).getReg();
3987 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3988 const LLT SrcTy = MRI.getType(SrcReg);
3989
3990 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3991 if (SrcTy.getSizeInBits() > 128) {
3992 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3993 return false;
3994 }
3995
3996 // We implement a split vector operation by treating the sub-vectors as
3997 // scalars and extracting them.
3998 const RegisterBank &DstRB =
3999 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4000 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4001 Register Dst = I.getOperand(OpIdx).getReg();
4002 MachineInstr *Extract =
4003 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4004 if (!Extract)
4005 return false;
4006 }
4007 I.eraseFromParent();
4008 return true;
4009}
4010
4011bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4013 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4014 "unexpected opcode");
4015
4016 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4017 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4018 AArch64::FPRRegBankID ||
4019 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4020 AArch64::FPRRegBankID) {
4021 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4022 "currently unsupported.\n");
4023 return false;
4024 }
4025
4026 // The last operand is the vector source register, and every other operand is
4027 // a register to unpack into.
4028 unsigned NumElts = I.getNumOperands() - 1;
4029 Register SrcReg = I.getOperand(NumElts).getReg();
4030 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4031 const LLT WideTy = MRI.getType(SrcReg);
4032 (void)WideTy;
4033 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4034 "can only unmerge from vector or s128 types!");
4035 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4036 "source register size too small!");
4037
4038 if (!NarrowTy.isScalar())
4039 return selectSplitVectorUnmerge(I, MRI);
4040
4041 // Choose a lane copy opcode and subregister based off of the size of the
4042 // vector's elements.
4043 unsigned CopyOpc = 0;
4044 unsigned ExtractSubReg = 0;
4045 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4046 return false;
4047
4048 // Set up for the lane copies.
4049 MachineBasicBlock &MBB = *I.getParent();
4050
4051 // Stores the registers we'll be copying from.
4052 SmallVector<Register, 4> InsertRegs;
4053
4054 // We'll use the first register twice, so we only need NumElts-1 registers.
4055 unsigned NumInsertRegs = NumElts - 1;
4056
4057 // If our elements fit into exactly 128 bits, then we can copy from the source
4058 // directly. Otherwise, we need to do a bit of setup with some subregister
4059 // inserts.
4060 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4061 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4062 } else {
4063 // No. We have to perform subregister inserts. For each insert, create an
4064 // implicit def and a subregister insert, and save the register we create.
4065 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4066 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4067 *RBI.getRegBank(SrcReg, MRI, TRI));
4068 unsigned SubReg = 0;
4069 bool Found = getSubRegForClass(RC, TRI, SubReg);
4070 (void)Found;
4071 assert(Found && "expected to find last operand's subeg idx");
4072 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4073 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4074 MachineInstr &ImpDefMI =
4075 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4076 ImpDefReg);
4077
4078 // Now, create the subregister insert from SrcReg.
4079 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4080 MachineInstr &InsMI =
4081 *BuildMI(MBB, I, I.getDebugLoc(),
4082 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4083 .addUse(ImpDefReg)
4084 .addUse(SrcReg)
4085 .addImm(SubReg);
4086
4087 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4089
4090 // Save the register so that we can copy from it after.
4091 InsertRegs.push_back(InsertReg);
4092 }
4093 }
4094
4095 // Now that we've created any necessary subregister inserts, we can
4096 // create the copies.
4097 //
4098 // Perform the first copy separately as a subregister copy.
4099 Register CopyTo = I.getOperand(0).getReg();
4100 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4101 .addReg(InsertRegs[0], 0, ExtractSubReg);
4102 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4103
4104 // Now, perform the remaining copies as vector lane copies.
4105 unsigned LaneIdx = 1;
4106 for (Register InsReg : InsertRegs) {
4107 Register CopyTo = I.getOperand(LaneIdx).getReg();
4108 MachineInstr &CopyInst =
4109 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4110 .addUse(InsReg)
4111 .addImm(LaneIdx);
4112 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4113 ++LaneIdx;
4114 }
4115
4116 // Separately constrain the first copy's destination. Because of the
4117 // limitation in constrainOperandRegClass, we can't guarantee that this will
4118 // actually be constrained. So, do it ourselves using the second operand.
4119 const TargetRegisterClass *RC =
4120 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4121 if (!RC) {
4122 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4123 return false;
4124 }
4125
4126 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4127 I.eraseFromParent();
4128 return true;
4129}
4130
4131bool AArch64InstructionSelector::selectConcatVectors(
4133 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4134 "Unexpected opcode");
4135 Register Dst = I.getOperand(0).getReg();
4136 Register Op1 = I.getOperand(1).getReg();
4137 Register Op2 = I.getOperand(2).getReg();
4138 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4139 if (!ConcatMI)
4140 return false;
4141 I.eraseFromParent();
4142 return true;
4143}
4144
4145unsigned
4146AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4147 MachineFunction &MF) const {
4148 Type *CPTy = CPVal->getType();
4149 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4150
4152 return MCP->getConstantPoolIndex(CPVal, Alignment);
4153}
4154
4155MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4156 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4157 const TargetRegisterClass *RC;
4158 unsigned Opc;
4159 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4160 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4161 switch (Size) {
4162 case 16:
4163 RC = &AArch64::FPR128RegClass;
4164 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4165 break;
4166 case 8:
4167 RC = &AArch64::FPR64RegClass;
4168 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4169 break;
4170 case 4:
4171 RC = &AArch64::FPR32RegClass;
4172 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4173 break;
4174 case 2:
4175 RC = &AArch64::FPR16RegClass;
4176 Opc = AArch64::LDRHui;
4177 break;
4178 default:
4179 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4180 << *CPVal->getType());
4181 return nullptr;
4182 }
4183
4184 MachineInstr *LoadMI = nullptr;
4185 auto &MF = MIRBuilder.getMF();
4186 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4187 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4188 // Use load(literal) for tiny code model.
4189 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4190 } else {
4191 auto Adrp =
4192 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4193 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4194
4195 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4196 .addConstantPoolIndex(
4198
4200 }
4201
4203 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4205 Size, Align(Size)));
4207 return LoadMI;
4208}
4209
4210/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4211/// size and RB.
4212static std::pair<unsigned, unsigned>
4213getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4214 unsigned Opc, SubregIdx;
4215 if (RB.getID() == AArch64::GPRRegBankID) {
4216 if (EltSize == 8) {
4217 Opc = AArch64::INSvi8gpr;
4218 SubregIdx = AArch64::bsub;
4219 } else if (EltSize == 16) {
4220 Opc = AArch64::INSvi16gpr;
4221 SubregIdx = AArch64::ssub;
4222 } else if (EltSize == 32) {
4223 Opc = AArch64::INSvi32gpr;
4224 SubregIdx = AArch64::ssub;
4225 } else if (EltSize == 64) {
4226 Opc = AArch64::INSvi64gpr;
4227 SubregIdx = AArch64::dsub;
4228 } else {
4229 llvm_unreachable("invalid elt size!");
4230 }
4231 } else {
4232 if (EltSize == 8) {
4233 Opc = AArch64::INSvi8lane;
4234 SubregIdx = AArch64::bsub;
4235 } else if (EltSize == 16) {
4236 Opc = AArch64::INSvi16lane;
4237 SubregIdx = AArch64::hsub;
4238 } else if (EltSize == 32) {
4239 Opc = AArch64::INSvi32lane;
4240 SubregIdx = AArch64::ssub;
4241 } else if (EltSize == 64) {
4242 Opc = AArch64::INSvi64lane;
4243 SubregIdx = AArch64::dsub;
4244 } else {
4245 llvm_unreachable("invalid elt size!");
4246 }
4247 }
4248 return std::make_pair(Opc, SubregIdx);
4249}
4250
4251MachineInstr *AArch64InstructionSelector::emitInstr(
4252 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4253 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4254 const ComplexRendererFns &RenderFns) const {
4255 assert(Opcode && "Expected an opcode?");
4256 assert(!isPreISelGenericOpcode(Opcode) &&
4257 "Function should only be used to produce selected instructions!");
4258 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4259 if (RenderFns)
4260 for (auto &Fn : *RenderFns)
4261 Fn(MI);
4263 return &*MI;
4264}
4265
4266MachineInstr *AArch64InstructionSelector::emitAddSub(
4267 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4268 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4269 MachineIRBuilder &MIRBuilder) const {
4270 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4271 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4272 auto Ty = MRI.getType(LHS.getReg());
4273 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4274 unsigned Size = Ty.getSizeInBits();
4275 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4276 bool Is32Bit = Size == 32;
4277
4278 // INSTRri form with positive arithmetic immediate.
4279 if (auto Fns = selectArithImmed(RHS))
4280 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4281 MIRBuilder, Fns);
4282
4283 // INSTRri form with negative arithmetic immediate.
4284 if (auto Fns = selectNegArithImmed(RHS))
4285 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4286 MIRBuilder, Fns);
4287
4288 // INSTRrx form.
4289 if (auto Fns = selectArithExtendedRegister(RHS))
4290 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4291 MIRBuilder, Fns);
4292
4293 // INSTRrs form.
4294 if (auto Fns = selectShiftedRegister(RHS))
4295 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4296 MIRBuilder, Fns);
4297 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4298 MIRBuilder);
4299}
4300
4302AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4303 MachineOperand &RHS,
4304 MachineIRBuilder &MIRBuilder) const {
4305 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4306 {{AArch64::ADDXri, AArch64::ADDWri},
4307 {AArch64::ADDXrs, AArch64::ADDWrs},
4308 {AArch64::ADDXrr, AArch64::ADDWrr},
4309 {AArch64::SUBXri, AArch64::SUBWri},
4310 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4311 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4312}
4313
4315AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4316 MachineOperand &RHS,
4317 MachineIRBuilder &MIRBuilder) const {
4318 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4319 {{AArch64::ADDSXri, AArch64::ADDSWri},
4320 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4321 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4322 {AArch64::SUBSXri, AArch64::SUBSWri},
4323 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4324 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4325}
4326
4328AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4329 MachineOperand &RHS,
4330 MachineIRBuilder &MIRBuilder) const {
4331 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4332 {{AArch64::SUBSXri, AArch64::SUBSWri},
4333 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4334 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4335 {AArch64::ADDSXri, AArch64::ADDSWri},
4336 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4337 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4338}
4339
4341AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4342 MachineOperand &RHS,
4343 MachineIRBuilder &MIRBuilder) const {
4344 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4345 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4346 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4347 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4348 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4349}
4350
4352AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4353 MachineOperand &RHS,
4354 MachineIRBuilder &MIRBuilder) const {
4355 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4356 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4357 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4358 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4359 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4360}
4361
4363AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4364 MachineIRBuilder &MIRBuilder) const {
4365 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4366 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4367 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4368 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4369}
4370
4372AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4373 MachineIRBuilder &MIRBuilder) const {
4374 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4375 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4376 LLT Ty = MRI.getType(LHS.getReg());
4377 unsigned RegSize = Ty.getSizeInBits();
4378 bool Is32Bit = (RegSize == 32);
4379 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4380 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4381 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4382 // ANDS needs a logical immediate for its immediate form. Check if we can
4383 // fold one in.
4384 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4385 int64_t Imm = ValAndVReg->Value.getSExtValue();
4386
4388 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4391 return &*TstMI;
4392 }
4393 }
4394
4395 if (auto Fns = selectLogicalShiftedRegister(RHS))
4396 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4397 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4398}
4399
4400MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4401 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4402 MachineIRBuilder &MIRBuilder) const {
4403 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4404 assert(Predicate.isPredicate() && "Expected predicate?");
4405 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4406 LLT CmpTy = MRI.getType(LHS.getReg());
4407 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4408 unsigned Size = CmpTy.getSizeInBits();
4409 (void)Size;
4410 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4411 // Fold the compare into a cmn or tst if possible.
4412 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4413 return FoldCmp;
4414 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4415 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4416}
4417
4418MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4419 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4420 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4421#ifndef NDEBUG
4422 LLT Ty = MRI.getType(Dst);
4423 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4424 "Expected a 32-bit scalar register?");
4425#endif
4426 const Register ZReg = AArch64::WZR;
4427 AArch64CC::CondCode CC1, CC2;
4428 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4429 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4430 if (CC2 == AArch64CC::AL)
4431 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4432 MIRBuilder);
4433 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4434 Register Def1Reg = MRI.createVirtualRegister(RC);
4435 Register Def2Reg = MRI.createVirtualRegister(RC);
4436 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4437 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4438 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4439 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4441 return &*OrMI;
4442}
4443
4444MachineInstr *AArch64InstructionSelector::emitFPCompare(
4445 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4446 std::optional<CmpInst::Predicate> Pred) const {
4447 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4448 LLT Ty = MRI.getType(LHS);
4449 if (Ty.isVector())
4450 return nullptr;
4451 unsigned OpSize = Ty.getSizeInBits();
4452 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4453
4454 // If this is a compare against +0.0, then we don't have
4455 // to explicitly materialize a constant.
4456 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4457 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4458
4459 auto IsEqualityPred = [](CmpInst::Predicate P) {
4460 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4462 };
4463 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4464 // Try commutating the operands.
4465 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4466 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4467 ShouldUseImm = true;
4468 std::swap(LHS, RHS);
4469 }
4470 }
4471 unsigned CmpOpcTbl[2][3] = {
4472 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4473 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4474 unsigned CmpOpc =
4475 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4476
4477 // Partially build the compare. Decide if we need to add a use for the
4478 // third operand based off whether or not we're comparing against 0.0.
4479 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4481 if (!ShouldUseImm)
4482 CmpMI.addUse(RHS);
4484 return &*CmpMI;
4485}
4486
4487MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4488 std::optional<Register> Dst, Register Op1, Register Op2,
4489 MachineIRBuilder &MIRBuilder) const {
4490 // We implement a vector concat by:
4491 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4492 // 2. Insert the upper vector into the destination's upper element
4493 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4494 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4495
4496 const LLT Op1Ty = MRI.getType(Op1);
4497 const LLT Op2Ty = MRI.getType(Op2);
4498
4499 if (Op1Ty != Op2Ty) {
4500 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4501 return nullptr;
4502 }
4503 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4504
4505 if (Op1Ty.getSizeInBits() >= 128) {
4506 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4507 return nullptr;
4508 }
4509
4510 // At the moment we just support 64 bit vector concats.
4511 if (Op1Ty.getSizeInBits() != 64) {
4512 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4513 return nullptr;
4514 }
4515
4516 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4517 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4518 const TargetRegisterClass *DstRC =
4519 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4520
4521 MachineInstr *WidenedOp1 =
4522 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4523 MachineInstr *WidenedOp2 =
4524 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4525 if (!WidenedOp1 || !WidenedOp2) {
4526 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4527 return nullptr;
4528 }
4529
4530 // Now do the insert of the upper element.
4531 unsigned InsertOpc, InsSubRegIdx;
4532 std::tie(InsertOpc, InsSubRegIdx) =
4533 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4534
4535 if (!Dst)
4536 Dst = MRI.createVirtualRegister(DstRC);
4537 auto InsElt =
4538 MIRBuilder
4539 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4540 .addImm(1) /* Lane index */
4541 .addUse(WidenedOp2->getOperand(0).getReg())
4542 .addImm(0);
4544 return &*InsElt;
4545}
4546
4548AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4549 Register Src2, AArch64CC::CondCode Pred,
4550 MachineIRBuilder &MIRBuilder) const {
4551 auto &MRI = *MIRBuilder.getMRI();
4552 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4553 // If we used a register class, then this won't necessarily have an LLT.
4554 // Compute the size based off whether or not we have a class or bank.
4555 unsigned Size;
4556 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4557 Size = TRI.getRegSizeInBits(*RC);
4558 else
4559 Size = MRI.getType(Dst).getSizeInBits();
4560 // Some opcodes use s1.
4561 assert(Size <= 64 && "Expected 64 bits or less only!");
4562 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4563 unsigned Opc = OpcTable[Size == 64];
4564 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4566 return &*CSINC;
4567}
4568
4569MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4570 Register CarryReg) {
4572 unsigned Opcode = I.getOpcode();
4573
4574 // If the instruction is a SUB, we need to negate the carry,
4575 // because borrowing is indicated by carry-flag == 0.
4576 bool NeedsNegatedCarry =
4577 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4578
4579 // If the previous instruction will already produce the correct carry, do not
4580 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4581 // generated during legalization of wide add/sub. This optimization depends on
4582 // these sequences not being interrupted by other instructions.
4583 // We have to select the previous instruction before the carry-using
4584 // instruction is deleted by the calling function, otherwise the previous
4585 // instruction might become dead and would get deleted.
4586 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4587 if (SrcMI == I.getPrevNode()) {
4588 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4589 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4590 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4591 CarrySrcMI->isUnsigned() &&
4592 CarrySrcMI->getCarryOutReg() == CarryReg &&
4593 selectAndRestoreState(*SrcMI))
4594 return nullptr;
4595 }
4596 }
4597
4598 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4599
4600 if (NeedsNegatedCarry) {
4601 // (0 - Carry) sets !C in NZCV when Carry == 1
4602 Register ZReg = AArch64::WZR;
4603 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4604 }
4605
4606 // (Carry - 1) sets !C in NZCV when Carry == 0
4607 auto Fns = select12BitValueWithLeftShift(1);
4608 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4609}
4610
4611bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4613 auto &CarryMI = cast<GAddSubCarryOut>(I);
4614
4615 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4616 // Set NZCV carry according to carry-in VReg
4617 emitCarryIn(I, CarryInMI->getCarryInReg());
4618 }
4619
4620 // Emit the operation and get the correct condition code.
4621 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4622 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4623
4624 Register CarryOutReg = CarryMI.getCarryOutReg();
4625
4626 // Don't convert carry-out to VReg if it is never used
4627 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4628 // Now, put the overflow result in the register given by the first operand
4629 // to the overflow op. CSINC increments the result when the predicate is
4630 // false, so to get the increment when it's true, we need to use the
4631 // inverse. In this case, we want to increment when carry is set.
4632 Register ZReg = AArch64::WZR;
4633 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4634 getInvertedCondCode(OpAndCC.second), MIB);
4635 }
4636
4637 I.eraseFromParent();
4638 return true;
4639}
4640
4641std::pair<MachineInstr *, AArch64CC::CondCode>
4642AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4643 MachineOperand &LHS,
4644 MachineOperand &RHS,
4645 MachineIRBuilder &MIRBuilder) const {
4646 switch (Opcode) {
4647 default:
4648 llvm_unreachable("Unexpected opcode!");
4649 case TargetOpcode::G_SADDO:
4650 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4651 case TargetOpcode::G_UADDO:
4652 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4653 case TargetOpcode::G_SSUBO:
4654 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4655 case TargetOpcode::G_USUBO:
4656 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4657 case TargetOpcode::G_SADDE:
4658 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4659 case TargetOpcode::G_UADDE:
4660 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4661 case TargetOpcode::G_SSUBE:
4662 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4663 case TargetOpcode::G_USUBE:
4664 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4665 }
4666}
4667
4668/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4669/// expressed as a conjunction.
4670/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4671/// changing the conditions on the CMP tests.
4672/// (this means we can call emitConjunctionRec() with
4673/// Negate==true on this sub-tree)
4674/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4675/// cannot do the negation naturally. We are required to
4676/// emit the subtree first in this case.
4677/// \param WillNegate Is true if are called when the result of this
4678/// subexpression must be negated. This happens when the
4679/// outer expression is an OR. We can use this fact to know
4680/// that we have a double negation (or (or ...) ...) that
4681/// can be implemented for free.
4682static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4683 bool WillNegate, MachineRegisterInfo &MRI,
4684 unsigned Depth = 0) {
4685 if (!MRI.hasOneNonDBGUse(Val))
4686 return false;
4687 MachineInstr *ValDef = MRI.getVRegDef(Val);
4688 unsigned Opcode = ValDef->getOpcode();
4689 if (isa<GAnyCmp>(ValDef)) {
4690 CanNegate = true;
4691 MustBeFirst = false;
4692 return true;
4693 }
4694 // Protect against exponential runtime and stack overflow.
4695 if (Depth > 6)
4696 return false;
4697 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4698 bool IsOR = Opcode == TargetOpcode::G_OR;
4699 Register O0 = ValDef->getOperand(1).getReg();
4700 Register O1 = ValDef->getOperand(2).getReg();
4701 bool CanNegateL;
4702 bool MustBeFirstL;
4703 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4704 return false;
4705 bool CanNegateR;
4706 bool MustBeFirstR;
4707 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4708 return false;
4709
4710 if (MustBeFirstL && MustBeFirstR)
4711 return false;
4712
4713 if (IsOR) {
4714 // For an OR expression we need to be able to naturally negate at least
4715 // one side or we cannot do the transformation at all.
4716 if (!CanNegateL && !CanNegateR)
4717 return false;
4718 // If we the result of the OR will be negated and we can naturally negate
4719 // the leaves, then this sub-tree as a whole negates naturally.
4720 CanNegate = WillNegate && CanNegateL && CanNegateR;
4721 // If we cannot naturally negate the whole sub-tree, then this must be
4722 // emitted first.
4723 MustBeFirst = !CanNegate;
4724 } else {
4725 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4726 // We cannot naturally negate an AND operation.
4727 CanNegate = false;
4728 MustBeFirst = MustBeFirstL || MustBeFirstR;
4729 }
4730 return true;
4731 }
4732 return false;
4733}
4734
4735MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4738 MachineIRBuilder &MIB) const {
4739 auto &MRI = *MIB.getMRI();
4740 LLT OpTy = MRI.getType(LHS);
4741 unsigned CCmpOpc;
4742 std::optional<ValueAndVReg> C;
4744 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4746 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4747 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4748 else if (C->Value.ule(31))
4749 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4750 else
4751 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4752 } else {
4753 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4754 OpTy.getSizeInBits() == 64);
4755 switch (OpTy.getSizeInBits()) {
4756 case 16:
4757 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4758 CCmpOpc = AArch64::FCCMPHrr;
4759 break;
4760 case 32:
4761 CCmpOpc = AArch64::FCCMPSrr;
4762 break;
4763 case 64:
4764 CCmpOpc = AArch64::FCCMPDrr;
4765 break;
4766 default:
4767 return nullptr;
4768 }
4769 }
4771 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4772 auto CCmp =
4773 MIB.buildInstr(CCmpOpc, {}, {LHS});
4774 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4775 CCmp.addImm(C->Value.getZExtValue());
4776 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4777 CCmp.addImm(C->Value.abs().getZExtValue());
4778 else
4779 CCmp.addReg(RHS);
4780 CCmp.addImm(NZCV).addImm(Predicate);
4782 return &*CCmp;
4783}
4784
4785MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4786 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4787 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4788 // We're at a tree leaf, produce a conditional comparison operation.
4789 auto &MRI = *MIB.getMRI();
4790 MachineInstr *ValDef = MRI.getVRegDef(Val);
4791 unsigned Opcode = ValDef->getOpcode();
4792 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4793 Register LHS = Cmp->getLHSReg();
4794 Register RHS = Cmp->getRHSReg();
4795 CmpInst::Predicate CC = Cmp->getCond();
4796 if (Negate)
4798 if (isa<GICmp>(Cmp)) {
4800 } else {
4801 // Handle special FP cases.
4802 AArch64CC::CondCode ExtraCC;
4803 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4804 // Some floating point conditions can't be tested with a single condition
4805 // code. Construct an additional comparison in this case.
4806 if (ExtraCC != AArch64CC::AL) {
4807 MachineInstr *ExtraCmp;
4808 if (!CCOp)
4809 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4810 else
4811 ExtraCmp =
4812 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4813 CCOp = ExtraCmp->getOperand(0).getReg();
4814 Predicate = ExtraCC;
4815 }
4816 }
4817
4818 // Produce a normal comparison if we are first in the chain
4819 if (!CCOp) {
4820 auto Dst = MRI.cloneVirtualRegister(LHS);
4821 if (isa<GICmp>(Cmp))
4822 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4823 return emitFPCompare(Cmp->getOperand(2).getReg(),
4824 Cmp->getOperand(3).getReg(), MIB);
4825 }
4826 // Otherwise produce a ccmp.
4827 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4828 }
4829 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4830
4831 bool IsOR = Opcode == TargetOpcode::G_OR;
4832
4833 Register LHS = ValDef->getOperand(1).getReg();
4834 bool CanNegateL;
4835 bool MustBeFirstL;
4836 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4837 assert(ValidL && "Valid conjunction/disjunction tree");
4838 (void)ValidL;
4839
4840 Register RHS = ValDef->getOperand(2).getReg();
4841 bool CanNegateR;
4842 bool MustBeFirstR;
4843 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4844 assert(ValidR && "Valid conjunction/disjunction tree");
4845 (void)ValidR;
4846
4847 // Swap sub-tree that must come first to the right side.
4848 if (MustBeFirstL) {
4849 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4850 std::swap(LHS, RHS);
4851 std::swap(CanNegateL, CanNegateR);
4852 std::swap(MustBeFirstL, MustBeFirstR);
4853 }
4854
4855 bool NegateR;
4856 bool NegateAfterR;
4857 bool NegateL;
4858 bool NegateAfterAll;
4859 if (Opcode == TargetOpcode::G_OR) {
4860 // Swap the sub-tree that we can negate naturally to the left.
4861 if (!CanNegateL) {
4862 assert(CanNegateR && "at least one side must be negatable");
4863 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4864 assert(!Negate);
4865 std::swap(LHS, RHS);
4866 NegateR = false;
4867 NegateAfterR = true;
4868 } else {
4869 // Negate the left sub-tree if possible, otherwise negate the result.
4870 NegateR = CanNegateR;
4871 NegateAfterR = !CanNegateR;
4872 }
4873 NegateL = true;
4874 NegateAfterAll = !Negate;
4875 } else {
4876 assert(Opcode == TargetOpcode::G_AND &&
4877 "Valid conjunction/disjunction tree");
4878 assert(!Negate && "Valid conjunction/disjunction tree");
4879
4880 NegateL = false;
4881 NegateR = false;
4882 NegateAfterR = false;
4883 NegateAfterAll = false;
4884 }
4885
4886 // Emit sub-trees.
4887 AArch64CC::CondCode RHSCC;
4888 MachineInstr *CmpR =
4889 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4890 if (NegateAfterR)
4891 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4893 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4894 if (NegateAfterAll)
4895 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4896 return CmpL;
4897}
4898
4899MachineInstr *AArch64InstructionSelector::emitConjunction(
4900 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4901 bool DummyCanNegate;
4902 bool DummyMustBeFirst;
4903 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4904 *MIB.getMRI()))
4905 return nullptr;
4906 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4907}
4908
4909bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4910 MachineInstr &CondMI) {
4911 AArch64CC::CondCode AArch64CC;
4912 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4913 if (!ConjMI)
4914 return false;
4915
4916 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4917 SelI.eraseFromParent();
4918 return true;
4919}
4920
4921bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4922 MachineRegisterInfo &MRI = *MIB.getMRI();
4923 // We want to recognize this pattern:
4924 //
4925 // $z = G_FCMP pred, $x, $y
4926 // ...
4927 // $w = G_SELECT $z, $a, $b
4928 //
4929 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4930 // some copies/truncs in between.)
4931 //
4932 // If we see this, then we can emit something like this:
4933 //
4934 // fcmp $x, $y
4935 // fcsel $w, $a, $b, pred
4936 //
4937 // Rather than emitting both of the rather long sequences in the standard
4938 // G_FCMP/G_SELECT select methods.
4939
4940 // First, check if the condition is defined by a compare.
4941 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4942
4943 // We can only fold if all of the defs have one use.
4944 Register CondDefReg = CondDef->getOperand(0).getReg();
4945 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4946 // Unless it's another select.
4947 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4948 if (CondDef == &UI)
4949 continue;
4950 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4951 return false;
4952 }
4953 }
4954
4955 // Is the condition defined by a compare?
4956 unsigned CondOpc = CondDef->getOpcode();
4957 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4958 if (tryOptSelectConjunction(I, *CondDef))
4959 return true;
4960 return false;
4961 }
4962
4964 if (CondOpc == TargetOpcode::G_ICMP) {
4965 auto Pred =
4966 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4968 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4969 CondDef->getOperand(1), MIB);
4970 } else {
4971 // Get the condition code for the select.
4972 auto Pred =
4973 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4974 AArch64CC::CondCode CondCode2;
4975 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4976
4977 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4978 // instructions to emit the comparison.
4979 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4980 // unnecessary.
4981 if (CondCode2 != AArch64CC::AL)
4982 return false;
4983
4984 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4985 CondDef->getOperand(3).getReg(), MIB)) {
4986 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4987 return false;
4988 }
4989 }
4990
4991 // Emit the select.
4992 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4993 I.getOperand(3).getReg(), CondCode, MIB);
4994 I.eraseFromParent();
4995 return true;
4996}
4997
4998MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4999 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5000 MachineIRBuilder &MIRBuilder) const {
5001 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5002 "Unexpected MachineOperand");
5003 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5004 // We want to find this sort of thing:
5005 // x = G_SUB 0, y
5006 // G_ICMP z, x
5007 //
5008 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5009 // e.g:
5010 //
5011 // cmn z, y
5012
5013 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5014 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5016 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5017 // Given this:
5018 //
5019 // x = G_SUB 0, y
5020 // G_ICMP x, z
5021 //
5022 // Produce this:
5023 //
5024 // cmn y, z
5025 if (isCMN(LHSDef, P, MRI))
5026 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5027
5028 // Same idea here, but with the RHS of the compare instead:
5029 //
5030 // Given this:
5031 //
5032 // x = G_SUB 0, y
5033 // G_ICMP z, x
5034 //
5035 // Produce this:
5036 //
5037 // cmn z, y
5038 if (isCMN(RHSDef, P, MRI))
5039 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5040
5041 // Given this:
5042 //
5043 // z = G_AND x, y
5044 // G_ICMP z, 0
5045 //
5046 // Produce this if the compare is signed:
5047 //
5048 // tst x, y
5049 if (!CmpInst::isUnsigned(P) && LHSDef &&
5050 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5051 // Make sure that the RHS is 0.
5052 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5053 if (!ValAndVReg || ValAndVReg->Value != 0)
5054 return nullptr;
5055
5056 return emitTST(LHSDef->getOperand(1),
5057 LHSDef->getOperand(2), MIRBuilder);
5058 }
5059
5060 return nullptr;
5061}
5062
5063bool AArch64InstructionSelector::selectShuffleVector(
5065 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5066 Register Src1Reg = I.getOperand(1).getReg();
5067 const LLT Src1Ty = MRI.getType(Src1Reg);
5068 Register Src2Reg = I.getOperand(2).getReg();
5069 const LLT Src2Ty = MRI.getType(Src2Reg);
5070 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5071
5072 MachineBasicBlock &MBB = *I.getParent();
5073 MachineFunction &MF = *MBB.getParent();
5074 LLVMContext &Ctx = MF.getFunction().getContext();
5075
5076 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5077 // it's originated from a <1 x T> type. Those should have been lowered into
5078 // G_BUILD_VECTOR earlier.
5079 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5080 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5081 return false;
5082 }
5083
5084 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5085
5087 for (int Val : Mask) {
5088 // For now, any undef indexes we'll just assume to be 0. This should be
5089 // optimized in future, e.g. to select DUP etc.
5090 Val = Val < 0 ? 0 : Val;
5091 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5092 unsigned Offset = Byte + Val * BytesPerElt;
5093 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5094 }
5095 }
5096
5097 // Use a constant pool to load the index vector for TBL.
5098 Constant *CPVal = ConstantVector::get(CstIdxs);
5099 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5100 if (!IndexLoad) {
5101 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5102 return false;
5103 }
5104
5105 if (DstTy.getSizeInBits() != 128) {
5106 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5107 // This case can be done with TBL1.
5109 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5110 if (!Concat) {
5111 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5112 return false;
5113 }
5114
5115 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5116 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5117 IndexLoad->getOperand(0).getReg(), MIB);
5118
5119 auto TBL1 = MIB.buildInstr(
5120 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5121 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5123
5124 auto Copy =
5125 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5126 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5127 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5128 I.eraseFromParent();
5129 return true;
5130 }
5131
5132 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5133 // Q registers for regalloc.
5134 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5135 auto RegSeq = createQTuple(Regs, MIB);
5136 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5137 {RegSeq, IndexLoad->getOperand(0)});
5139 I.eraseFromParent();
5140 return true;
5141}
5142
5143MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5144 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5145 unsigned LaneIdx, const RegisterBank &RB,
5146 MachineIRBuilder &MIRBuilder) const {
5147 MachineInstr *InsElt = nullptr;
5148 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5149 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5150
5151 // Create a register to define with the insert if one wasn't passed in.
5152 if (!DstReg)
5153 DstReg = MRI.createVirtualRegister(DstRC);
5154
5155 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5156 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5157
5158 if (RB.getID() == AArch64::FPRRegBankID) {
5159 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5160 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5161 .addImm(LaneIdx)
5162 .addUse(InsSub->getOperand(0).getReg())
5163 .addImm(0);
5164 } else {
5165 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5166 .addImm(LaneIdx)
5167 .addUse(EltReg);
5168 }
5169
5171 return InsElt;
5172}
5173
5174bool AArch64InstructionSelector::selectUSMovFromExtend(
5176 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5177 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5178 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5179 return false;
5180 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5181 const Register DefReg = MI.getOperand(0).getReg();
5182 const LLT DstTy = MRI.getType(DefReg);
5183 unsigned DstSize = DstTy.getSizeInBits();
5184
5185 if (DstSize != 32 && DstSize != 64)
5186 return false;
5187
5188 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5189 MI.getOperand(1).getReg(), MRI);
5190 int64_t Lane;
5191 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5192 return false;
5193 Register Src0 = Extract->getOperand(1).getReg();
5194
5195 const LLT &VecTy = MRI.getType(Src0);
5196
5197 if (VecTy.getSizeInBits() != 128) {
5198 const MachineInstr *ScalarToVector = emitScalarToVector(
5199 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5200 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5201 Src0 = ScalarToVector->getOperand(0).getReg();
5202 }
5203
5204 unsigned Opcode;
5205 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5206 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5207 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5208 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5209 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5210 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5211 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5212 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5213 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5214 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5215 else
5216 llvm_unreachable("Unexpected type combo for S/UMov!");
5217
5218 // We may need to generate one of these, depending on the type and sign of the
5219 // input:
5220 // DstReg = SMOV Src0, Lane;
5221 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5222 MachineInstr *ExtI = nullptr;
5223 if (DstSize == 64 && !IsSigned) {
5224 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5225 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5226 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5227 .addImm(0)
5228 .addUse(NewReg)
5229 .addImm(AArch64::sub_32);
5230 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5231 } else
5232 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5233
5235 MI.eraseFromParent();
5236 return true;
5237}
5238
5239MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5240 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5241 unsigned int Op;
5242 if (DstSize == 128) {
5243 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5244 return nullptr;
5245 Op = AArch64::MOVIv16b_ns;
5246 } else {
5247 Op = AArch64::MOVIv8b_ns;
5248 }
5249
5250 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5251
5254 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5256 return &*Mov;
5257 }
5258 return nullptr;
5259}
5260
5261MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5262 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5263 bool Inv) {
5264
5265 unsigned int Op;
5266 if (DstSize == 128) {
5267 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5268 return nullptr;
5269 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5270 } else {
5271 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5272 }
5273
5274 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5275 uint64_t Shift;
5276
5279 Shift = 0;
5280 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5282 Shift = 8;
5283 } else
5284 return nullptr;
5285
5286 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5288 return &*Mov;
5289}
5290
5291MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5292 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5293 bool Inv) {
5294
5295 unsigned int Op;
5296 if (DstSize == 128) {
5297 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5298 return nullptr;
5299 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5300 } else {
5301 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5302 }
5303
5304 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5305 uint64_t Shift;
5306
5309 Shift = 0;
5310 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5312 Shift = 8;
5313 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5315 Shift = 16;
5316 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5318 Shift = 24;
5319 } else
5320 return nullptr;
5321
5322 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5324 return &*Mov;
5325}
5326
5327MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5328 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5329
5330 unsigned int Op;
5331 if (DstSize == 128) {
5332 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5333 return nullptr;
5334 Op = AArch64::MOVIv2d_ns;
5335 } else {
5336 Op = AArch64::MOVID;
5337 }
5338
5339 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5342 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5344 return &*Mov;
5345 }
5346 return nullptr;
5347}
5348
5349MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5350 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5351 bool Inv) {
5352
5353 unsigned int Op;
5354 if (DstSize == 128) {
5355 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5356 return nullptr;
5357 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5358 } else {
5359 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5360 }
5361
5362 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5363 uint64_t Shift;
5364
5367 Shift = 264;
5368 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5370 Shift = 272;
5371 } else
5372 return nullptr;
5373
5374 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5376 return &*Mov;
5377}
5378
5379MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5380 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5381
5382 unsigned int Op;
5383 bool IsWide = false;
5384 if (DstSize == 128) {
5385 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5386 return nullptr;
5387 Op = AArch64::FMOVv4f32_ns;
5388 IsWide = true;
5389 } else {
5390 Op = AArch64::FMOVv2f32_ns;
5391 }
5392
5393 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5394
5397 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5399 Op = AArch64::FMOVv2f64_ns;
5400 } else
5401 return nullptr;
5402
5403 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5405 return &*Mov;
5406}
5407
5408bool AArch64InstructionSelector::selectIndexedExtLoad(
5410 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5411 Register Dst = ExtLd.getDstReg();
5412 Register WriteBack = ExtLd.getWritebackReg();
5413 Register Base = ExtLd.getBaseReg();
5414 Register Offset = ExtLd.getOffsetReg();
5415 LLT Ty = MRI.getType(Dst);
5416 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5417 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5418 bool IsPre = ExtLd.isPre();
5419 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5420 bool InsertIntoXReg = false;
5421 bool IsDst64 = Ty.getSizeInBits() == 64;
5422
5423 unsigned Opc = 0;
5424 LLT NewLdDstTy;
5425 LLT s32 = LLT::scalar(32);
5426 LLT s64 = LLT::scalar(64);
5427
5428 if (MemSizeBits == 8) {
5429 if (IsSExt) {
5430 if (IsDst64)
5431 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5432 else
5433 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5434 NewLdDstTy = IsDst64 ? s64 : s32;
5435 } else {
5436 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5437 InsertIntoXReg = IsDst64;
5438 NewLdDstTy = s32;
5439 }
5440 } else if (MemSizeBits == 16) {
5441 if (IsSExt) {
5442 if (IsDst64)
5443 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5444 else
5445 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5446 NewLdDstTy = IsDst64 ? s64 : s32;
5447 } else {
5448 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5449 InsertIntoXReg = IsDst64;
5450 NewLdDstTy = s32;
5451 }
5452 } else if (MemSizeBits == 32) {
5453 if (IsSExt) {
5454 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5455 NewLdDstTy = s64;
5456 } else {
5457 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5458 InsertIntoXReg = IsDst64;
5459 NewLdDstTy = s32;
5460 }
5461 } else {
5462 llvm_unreachable("Unexpected size for indexed load");
5463 }
5464
5465 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5466 return false; // We should be on gpr.
5467
5468 auto Cst = getIConstantVRegVal(Offset, MRI);
5469 if (!Cst)
5470 return false; // Shouldn't happen, but just in case.
5471
5472 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5473 .addImm(Cst->getSExtValue());
5474 LdMI.cloneMemRefs(ExtLd);
5476 // Make sure to select the load with the MemTy as the dest type, and then
5477 // insert into X reg if needed.
5478 if (InsertIntoXReg) {
5479 // Generate a SUBREG_TO_REG.
5480 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5481 .addImm(0)
5482 .addUse(LdMI.getReg(1))
5483 .addImm(AArch64::sub_32);
5484 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5485 MRI);
5486 } else {
5487 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5488 selectCopy(*Copy, TII, MRI, TRI, RBI);
5489 }
5490 MI.eraseFromParent();
5491
5492 return true;
5493}
5494
5495bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5497 auto &Ld = cast<GIndexedLoad>(MI);
5498 Register Dst = Ld.getDstReg();
5499 Register WriteBack = Ld.getWritebackReg();
5500 Register Base = Ld.getBaseReg();
5501 Register Offset = Ld.getOffsetReg();
5502 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5503 "Unexpected type for indexed load");
5504 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5505
5506 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5507 return selectIndexedExtLoad(MI, MRI);
5508
5509 unsigned Opc = 0;
5510 if (Ld.isPre()) {
5511 static constexpr unsigned GPROpcodes[] = {
5512 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5513 AArch64::LDRXpre};
5514 static constexpr unsigned FPROpcodes[] = {
5515 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5516 AArch64::LDRQpre};
5517 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5518 Opc = FPROpcodes[Log2_32(MemSize)];
5519 else
5520 Opc = GPROpcodes[Log2_32(MemSize)];
5521 } else {
5522 static constexpr unsigned GPROpcodes[] = {
5523 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5524 AArch64::LDRXpost};
5525 static constexpr unsigned FPROpcodes[] = {
5526 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5527 AArch64::LDRDpost, AArch64::LDRQpost};
5528 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5529 Opc = FPROpcodes[Log2_32(MemSize)];
5530 else
5531 Opc = GPROpcodes[Log2_32(MemSize)];
5532 }
5533 auto Cst = getIConstantVRegVal(Offset, MRI);
5534 if (!Cst)
5535 return false; // Shouldn't happen, but just in case.
5536 auto LdMI =
5537 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5538 LdMI.cloneMemRefs(Ld);
5540 MI.eraseFromParent();
5541 return true;
5542}
5543
5544bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5546 Register Dst = I.getWritebackReg();
5547 Register Val = I.getValueReg();
5548 Register Base = I.getBaseReg();
5549 Register Offset = I.getOffsetReg();
5550 LLT ValTy = MRI.getType(Val);
5551 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5552
5553 unsigned Opc = 0;
5554 if (I.isPre()) {
5555 static constexpr unsigned GPROpcodes[] = {
5556 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5557 AArch64::STRXpre};
5558 static constexpr unsigned FPROpcodes[] = {
5559 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5560 AArch64::STRQpre};
5561
5562 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5563 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5564 else
5565 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5566 } else {
5567 static constexpr unsigned GPROpcodes[] = {
5568 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5569 AArch64::STRXpost};
5570 static constexpr unsigned FPROpcodes[] = {
5571 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5572 AArch64::STRDpost, AArch64::STRQpost};
5573
5574 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5575 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5576 else
5577 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5578 }
5579
5580 auto Cst = getIConstantVRegVal(Offset, MRI);
5581 if (!Cst)
5582 return false; // Shouldn't happen, but just in case.
5583 auto Str =
5584 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5585 Str.cloneMemRefs(I);
5587 I.eraseFromParent();
5588 return true;
5589}
5590
5592AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5593 MachineIRBuilder &MIRBuilder,
5595 LLT DstTy = MRI.getType(Dst);
5596 unsigned DstSize = DstTy.getSizeInBits();
5597 if (CV->isNullValue()) {
5598 if (DstSize == 128) {
5599 auto Mov =
5600 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5602 return &*Mov;
5603 }
5604
5605 if (DstSize == 64) {
5606 auto Mov =
5607 MIRBuilder
5608 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5609 .addImm(0);
5610 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5611 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5612 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5613 return &*Copy;
5614 }
5615 }
5616
5617 if (CV->getSplatValue()) {
5618 APInt DefBits = APInt::getSplat(
5619 DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
5620 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5621 MachineInstr *NewOp;
5622 bool Inv = false;
5623 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5624 (NewOp =
5625 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5626 (NewOp =
5627 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5628 (NewOp =
5629 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5630 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5631 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5632 return NewOp;
5633
5634 DefBits = ~DefBits;
5635 Inv = true;
5636 if ((NewOp =
5637 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5638 (NewOp =
5639 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5640 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5641 return NewOp;
5642 return nullptr;
5643 };
5644
5645 if (auto *NewOp = TryMOVIWithBits(DefBits))
5646 return NewOp;
5647
5648 // See if a fneg of the constant can be materialized with a MOVI, etc
5649 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5650 unsigned NegOpc) -> MachineInstr * {
5651 // FNegate each sub-element of the constant
5652 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5653 APInt NegBits(DstSize, 0);
5654 unsigned NumElts = DstSize / NumBits;
5655 for (unsigned i = 0; i < NumElts; i++)
5656 NegBits |= Neg << (NumBits * i);
5657 NegBits = DefBits ^ NegBits;
5658
5659 // Try to create the new constants with MOVI, and if so generate a fneg
5660 // for it.
5661 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5662 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5663 NewOp->getOperand(0).setReg(NewDst);
5664 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5665 }
5666 return nullptr;
5667 };
5668 MachineInstr *R;
5669 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5670 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5671 (STI.hasFullFP16() &&
5672 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5673 return R;
5674 }
5675
5676 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5677 if (!CPLoad) {
5678 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5679 return nullptr;
5680 }
5681
5682 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5683 RBI.constrainGenericRegister(
5684 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5685 return &*Copy;
5686}
5687
5688bool AArch64InstructionSelector::tryOptConstantBuildVec(
5690 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5691 unsigned DstSize = DstTy.getSizeInBits();
5692 assert(DstSize <= 128 && "Unexpected build_vec type!");
5693 if (DstSize < 32)
5694 return false;
5695 // Check if we're building a constant vector, in which case we want to
5696 // generate a constant pool load instead of a vector insert sequence.
5698 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5699 // Try to find G_CONSTANT or G_FCONSTANT
5700 auto *OpMI =
5701 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5702 if (OpMI)
5703 Csts.emplace_back(
5704 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5705 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5706 I.getOperand(Idx).getReg(), MRI)))
5707 Csts.emplace_back(
5708 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5709 else
5710 return false;
5711 }
5712 Constant *CV = ConstantVector::get(Csts);
5713 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5714 return false;
5715 I.eraseFromParent();
5716 return true;
5717}
5718
5719bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5721 // Given:
5722 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5723 //
5724 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5725 Register Dst = I.getOperand(0).getReg();
5726 Register EltReg = I.getOperand(1).getReg();
5727 LLT EltTy = MRI.getType(EltReg);
5728 // If the index isn't on the same bank as its elements, then this can't be a
5729 // SUBREG_TO_REG.
5730 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5731 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5732 if (EltRB != DstRB)
5733 return false;
5734 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5735 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5736 }))
5737 return false;
5738 unsigned SubReg;
5739 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5740 if (!EltRC)
5741 return false;
5742 const TargetRegisterClass *DstRC =
5743 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5744 if (!DstRC)
5745 return false;
5746 if (!getSubRegForClass(EltRC, TRI, SubReg))
5747 return false;
5748 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5749 .addImm(0)
5750 .addUse(EltReg)
5751 .addImm(SubReg);
5752 I.eraseFromParent();
5753 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5754 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5755}
5756
5757bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5759 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5760 // Until we port more of the optimized selections, for now just use a vector
5761 // insert sequence.
5762 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5763 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5764 unsigned EltSize = EltTy.getSizeInBits();
5765
5766 if (tryOptConstantBuildVec(I, DstTy, MRI))
5767 return true;
5768 if (tryOptBuildVecToSubregToReg(I, MRI))
5769 return true;
5770
5771 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5772 return false; // Don't support all element types yet.
5773 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5774
5775 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5776 MachineInstr *ScalarToVec =
5777 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5778 I.getOperand(1).getReg(), MIB);
5779 if (!ScalarToVec)
5780 return false;
5781
5782 Register DstVec = ScalarToVec->getOperand(0).getReg();
5783 unsigned DstSize = DstTy.getSizeInBits();
5784
5785 // Keep track of the last MI we inserted. Later on, we might be able to save
5786 // a copy using it.
5787 MachineInstr *PrevMI = ScalarToVec;
5788 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5789 // Note that if we don't do a subregister copy, we can end up making an
5790 // extra register.
5791 Register OpReg = I.getOperand(i).getReg();
5792 // Do not emit inserts for undefs
5793 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5794 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5795 DstVec = PrevMI->getOperand(0).getReg();
5796 }
5797 }
5798
5799 // If DstTy's size in bits is less than 128, then emit a subregister copy
5800 // from DstVec to the last register we've defined.
5801 if (DstSize < 128) {
5802 // Force this to be FPR using the destination vector.
5803 const TargetRegisterClass *RC =
5804 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5805 if (!RC)
5806 return false;
5807 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5808 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5809 return false;
5810 }
5811
5812 unsigned SubReg = 0;
5813 if (!getSubRegForClass(RC, TRI, SubReg))
5814 return false;
5815 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5816 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5817 << "\n");
5818 return false;
5819 }
5820
5821 Register Reg = MRI.createVirtualRegister(RC);
5822 Register DstReg = I.getOperand(0).getReg();
5823
5824 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5825 MachineOperand &RegOp = I.getOperand(1);
5826 RegOp.setReg(Reg);
5827 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5828 } else {
5829 // We either have a vector with all elements (except the first one) undef or
5830 // at least one non-undef non-first element. In the first case, we need to
5831 // constrain the output register ourselves as we may have generated an
5832 // INSERT_SUBREG operation which is a generic operation for which the
5833 // output regclass cannot be automatically chosen.
5834 //
5835 // In the second case, there is no need to do this as it may generate an
5836 // instruction like INSvi32gpr where the regclass can be automatically
5837 // chosen.
5838 //
5839 // Also, we save a copy by re-using the destination register on the final
5840 // insert.
5841 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5843
5844 Register DstReg = PrevMI->getOperand(0).getReg();
5845 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5846 const TargetRegisterClass *RC =
5847 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5848 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5849 }
5850 }
5851
5852 I.eraseFromParent();
5853 return true;
5854}
5855
5856bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5857 unsigned NumVecs,
5858 MachineInstr &I) {
5859 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5860 assert(Opc && "Expected an opcode?");
5861 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5862 auto &MRI = *MIB.getMRI();
5863 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5864 unsigned Size = Ty.getSizeInBits();
5865 assert((Size == 64 || Size == 128) &&
5866 "Destination must be 64 bits or 128 bits?");
5867 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5868 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5869 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5870 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5871 Load.cloneMemRefs(I);
5873 Register SelectedLoadDst = Load->getOperand(0).getReg();
5874 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5875 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5876 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5877 // Emit the subreg copies and immediately select them.
5878 // FIXME: We should refactor our copy code into an emitCopy helper and
5879 // clean up uses of this pattern elsewhere in the selector.
5880 selectCopy(*Vec, TII, MRI, TRI, RBI);
5881 }
5882 return true;
5883}
5884
5885bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5886 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5887 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5888 assert(Opc && "Expected an opcode?");
5889 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5890 auto &MRI = *MIB.getMRI();
5891 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5892 bool Narrow = Ty.getSizeInBits() == 64;
5893
5894 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5895 SmallVector<Register, 4> Regs(NumVecs);
5896 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5897 [](auto MO) { return MO.getReg(); });
5898
5899 if (Narrow) {
5900 transform(Regs, Regs.begin(), [this](Register Reg) {
5901 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5902 ->getOperand(0)
5903 .getReg();
5904 });
5905 Ty = Ty.multiplyElements(2);
5906 }
5907
5908 Register Tuple = createQTuple(Regs, MIB);
5909 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5910 if (!LaneNo)
5911 return false;
5912
5913 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
5914 auto Load = MIB.buildInstr(Opc, {Ty}, {})
5915 .addReg(Tuple)
5916 .addImm(LaneNo->getZExtValue())
5917 .addReg(Ptr);
5918 Load.cloneMemRefs(I);
5920 Register SelectedLoadDst = Load->getOperand(0).getReg();
5921 unsigned SubReg = AArch64::qsub0;
5922 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5923 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
5924 {Narrow ? DstOp(&AArch64::FPR128RegClass)
5925 : DstOp(I.getOperand(Idx).getReg())},
5926 {})
5927 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5928 Register WideReg = Vec.getReg(0);
5929 // Emit the subreg copies and immediately select them.
5930 selectCopy(*Vec, TII, MRI, TRI, RBI);
5931 if (Narrow &&
5932 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
5933 return false;
5934 }
5935 return true;
5936}
5937
5938void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
5939 unsigned NumVecs,
5940 unsigned Opc) {
5941 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5942 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5943 Register Ptr = I.getOperand(1 + NumVecs).getReg();
5944
5945 SmallVector<Register, 2> Regs(NumVecs);
5946 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5947 Regs.begin(), [](auto MO) { return MO.getReg(); });
5948
5949 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5950 : createDTuple(Regs, MIB);
5951 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5952 Store.cloneMemRefs(I);
5954}
5955
5956bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5957 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
5958 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5959 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5960 bool Narrow = Ty.getSizeInBits() == 64;
5961
5962 SmallVector<Register, 2> Regs(NumVecs);
5963 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5964 Regs.begin(), [](auto MO) { return MO.getReg(); });
5965
5966 if (Narrow)
5967 transform(Regs, Regs.begin(), [this](Register Reg) {
5968 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5969 ->getOperand(0)
5970 .getReg();
5971 });
5972
5973 Register Tuple = createQTuple(Regs, MIB);
5974
5975 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
5976 if (!LaneNo)
5977 return false;
5978 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
5979 auto Store = MIB.buildInstr(Opc, {}, {})
5980 .addReg(Tuple)
5981 .addImm(LaneNo->getZExtValue())
5982 .addReg(Ptr);
5983 Store.cloneMemRefs(I);
5985 return true;
5986}
5987
5988bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5990 // Find the intrinsic ID.
5991 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
5992
5993 const LLT S8 = LLT::scalar(8);
5994 const LLT S16 = LLT::scalar(16);
5995 const LLT S32 = LLT::scalar(32);
5996 const LLT S64 = LLT::scalar(64);
5997 const LLT P0 = LLT::pointer(0, 64);
5998 // Select the instruction.
5999 switch (IntrinID) {
6000 default:
6001 return false;
6002 case Intrinsic::aarch64_ldxp:
6003 case Intrinsic::aarch64_ldaxp: {
6004 auto NewI = MIB.buildInstr(
6005 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6006 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6007 {I.getOperand(3)});
6008 NewI.cloneMemRefs(I);
6010 break;
6011 }
6012 case Intrinsic::aarch64_neon_ld1x2: {
6013 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6014 unsigned Opc = 0;
6015 if (Ty == LLT::fixed_vector(8, S8))
6016 Opc = AArch64::LD1Twov8b;
6017 else if (Ty == LLT::fixed_vector(16, S8))
6018 Opc = AArch64::LD1Twov16b;
6019 else if (Ty == LLT::fixed_vector(4, S16))
6020 Opc = AArch64::LD1Twov4h;
6021 else if (Ty == LLT::fixed_vector(8, S16))
6022 Opc = AArch64::LD1Twov8h;
6023 else if (Ty == LLT::fixed_vector(2, S32))
6024 Opc = AArch64::LD1Twov2s;
6025 else if (Ty == LLT::fixed_vector(4, S32))
6026 Opc = AArch64::LD1Twov4s;
6027 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6028 Opc = AArch64::LD1Twov2d;
6029 else if (Ty == S64 || Ty == P0)
6030 Opc = AArch64::LD1Twov1d;
6031 else
6032 llvm_unreachable("Unexpected type for ld1x2!");
6033 selectVectorLoadIntrinsic(Opc, 2, I);
6034 break;
6035 }
6036 case Intrinsic::aarch64_neon_ld1x3: {
6037 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6038 unsigned Opc = 0;
6039 if (Ty == LLT::fixed_vector(8, S8))
6040 Opc = AArch64::LD1Threev8b;
6041 else if (Ty == LLT::fixed_vector(16, S8))
6042 Opc = AArch64::LD1Threev16b;
6043 else if (Ty == LLT::fixed_vector(4, S16))
6044 Opc = AArch64::LD1Threev4h;
6045 else if (Ty == LLT::fixed_vector(8, S16))
6046 Opc = AArch64::LD1Threev8h;
6047 else if (Ty == LLT::fixed_vector(2, S32))
6048 Opc = AArch64::LD1Threev2s;
6049 else if (Ty == LLT::fixed_vector(4, S32))
6050 Opc = AArch64::LD1Threev4s;
6051 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6052 Opc = AArch64::LD1Threev2d;
6053 else if (Ty == S64 || Ty == P0)
6054 Opc = AArch64::LD1Threev1d;
6055 else
6056 llvm_unreachable("Unexpected type for ld1x3!");
6057 selectVectorLoadIntrinsic(Opc, 3, I);
6058 break;
6059 }
6060 case Intrinsic::aarch64_neon_ld1x4: {
6061 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6062 unsigned Opc = 0;
6063 if (Ty == LLT::fixed_vector(8, S8))
6064 Opc = AArch64::LD1Fourv8b;
6065 else if (Ty == LLT::fixed_vector(16, S8))
6066 Opc = AArch64::LD1Fourv16b;
6067 else if (Ty == LLT::fixed_vector(4, S16))
6068 Opc = AArch64::LD1Fourv4h;
6069 else if (Ty == LLT::fixed_vector(8, S16))
6070 Opc = AArch64::LD1Fourv8h;
6071 else if (Ty == LLT::fixed_vector(2, S32))
6072 Opc = AArch64::LD1Fourv2s;
6073 else if (Ty == LLT::fixed_vector(4, S32))
6074 Opc = AArch64::LD1Fourv4s;
6075 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6076 Opc = AArch64::LD1Fourv2d;
6077 else if (Ty == S64 || Ty == P0)
6078 Opc = AArch64::LD1Fourv1d;
6079 else
6080 llvm_unreachable("Unexpected type for ld1x4!");
6081 selectVectorLoadIntrinsic(Opc, 4, I);
6082 break;
6083 }
6084 case Intrinsic::aarch64_neon_ld2: {
6085 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6086 unsigned Opc = 0;
6087 if (Ty == LLT::fixed_vector(8, S8))
6088 Opc = AArch64::LD2Twov8b;
6089 else if (Ty == LLT::fixed_vector(16, S8))
6090 Opc = AArch64::LD2Twov16b;
6091 else if (Ty == LLT::fixed_vector(4, S16))
6092 Opc = AArch64::LD2Twov4h;
6093 else if (Ty == LLT::fixed_vector(8, S16))
6094 Opc = AArch64::LD2Twov8h;
6095 else if (Ty == LLT::fixed_vector(2, S32))
6096 Opc = AArch64::LD2Twov2s;
6097 else if (Ty == LLT::fixed_vector(4, S32))
6098 Opc = AArch64::LD2Twov4s;
6099 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6100 Opc = AArch64::LD2Twov2d;
6101 else if (Ty == S64 || Ty == P0)
6102 Opc = AArch64::LD1Twov1d;
6103 else
6104 llvm_unreachable("Unexpected type for ld2!");
6105 selectVectorLoadIntrinsic(Opc, 2, I);
6106 break;
6107 }
6108 case Intrinsic::aarch64_neon_ld2lane: {
6109 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6110 unsigned Opc;
6111 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6112 Opc = AArch64::LD2i8;
6113 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6114 Opc = AArch64::LD2i16;
6115 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6116 Opc = AArch64::LD2i32;
6117 else if (Ty == LLT::fixed_vector(2, S64) ||
6118 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6119 Opc = AArch64::LD2i64;
6120 else
6121 llvm_unreachable("Unexpected type for st2lane!");
6122 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6123 return false;
6124 break;
6125 }
6126 case Intrinsic::aarch64_neon_ld2r: {
6127 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6128 unsigned Opc = 0;
6129 if (Ty == LLT::fixed_vector(8, S8))
6130 Opc = AArch64::LD2Rv8b;
6131 else if (Ty == LLT::fixed_vector(16, S8))
6132 Opc = AArch64::LD2Rv16b;
6133 else if (Ty == LLT::fixed_vector(4, S16))
6134 Opc = AArch64::LD2Rv4h;
6135 else if (Ty == LLT::fixed_vector(8, S16))
6136 Opc = AArch64::LD2Rv8h;
6137 else if (Ty == LLT::fixed_vector(2, S32))
6138 Opc = AArch64::LD2Rv2s;
6139 else if (Ty == LLT::fixed_vector(4, S32))
6140 Opc = AArch64::LD2Rv4s;
6141 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6142 Opc = AArch64::LD2Rv2d;
6143 else if (Ty == S64 || Ty == P0)
6144 Opc = AArch64::LD2Rv1d;
6145 else
6146 llvm_unreachable("Unexpected type for ld2r!");
6147 selectVectorLoadIntrinsic(Opc, 2, I);
6148 break;
6149 }
6150 case Intrinsic::aarch64_neon_ld3: {
6151 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6152 unsigned Opc = 0;
6153 if (Ty == LLT::fixed_vector(8, S8))
6154 Opc = AArch64::LD3Threev8b;
6155 else if (Ty == LLT::fixed_vector(16, S8))
6156 Opc = AArch64::LD3Threev16b;
6157 else if (Ty == LLT::fixed_vector(4, S16))
6158 Opc = AArch64::LD3Threev4h;
6159 else if (Ty == LLT::fixed_vector(8, S16))
6160 Opc = AArch64::LD3Threev8h;
6161 else if (Ty == LLT::fixed_vector(2, S32))
6162 Opc = AArch64::LD3Threev2s;
6163 else if (Ty == LLT::fixed_vector(4, S32))
6164 Opc = AArch64::LD3Threev4s;
6165 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6166 Opc = AArch64::LD3Threev2d;
6167 else if (Ty == S64 || Ty == P0)
6168 Opc = AArch64::LD1Threev1d;
6169 else
6170 llvm_unreachable("Unexpected type for ld3!");
6171 selectVectorLoadIntrinsic(Opc, 3, I);
6172 break;
6173 }
6174 case Intrinsic::aarch64_neon_ld3lane: {
6175 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6176 unsigned Opc;
6177 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6178 Opc = AArch64::LD3i8;
6179 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6180 Opc = AArch64::LD3i16;
6181 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6182 Opc = AArch64::LD3i32;
6183 else if (Ty == LLT::fixed_vector(2, S64) ||
6184 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6185 Opc = AArch64::LD3i64;
6186 else
6187 llvm_unreachable("Unexpected type for st3lane!");
6188 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6189 return false;
6190 break;
6191 }
6192 case Intrinsic::aarch64_neon_ld3r: {
6193 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6194 unsigned Opc = 0;
6195 if (Ty == LLT::fixed_vector(8, S8))
6196 Opc = AArch64::LD3Rv8b;
6197 else if (Ty == LLT::fixed_vector(16, S8))
6198 Opc = AArch64::LD3Rv16b;
6199 else if (Ty == LLT::fixed_vector(4, S16))
6200 Opc = AArch64::LD3Rv4h;
6201 else if (Ty == LLT::fixed_vector(8, S16))
6202 Opc = AArch64::LD3Rv8h;
6203 else if (Ty == LLT::fixed_vector(2, S32))
6204 Opc = AArch64::LD3Rv2s;
6205 else if (Ty == LLT::fixed_vector(4, S32))
6206 Opc = AArch64::LD3Rv4s;
6207 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6208 Opc = AArch64::LD3Rv2d;
6209 else if (Ty == S64 || Ty == P0)
6210 Opc = AArch64::LD3Rv1d;
6211 else
6212 llvm_unreachable("Unexpected type for ld3r!");
6213 selectVectorLoadIntrinsic(Opc, 3, I);
6214 break;
6215 }
6216 case Intrinsic::aarch64_neon_ld4: {
6217 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6218 unsigned Opc = 0;
6219 if (Ty == LLT::fixed_vector(8, S8))
6220 Opc = AArch64::LD4Fourv8b;
6221 else if (Ty == LLT::fixed_vector(16, S8))
6222 Opc = AArch64::LD4Fourv16b;
6223 else if (Ty == LLT::fixed_vector(4, S16))
6224 Opc = AArch64::LD4Fourv4h;
6225 else if (Ty == LLT::fixed_vector(8, S16))
6226 Opc = AArch64::LD4Fourv8h;
6227 else if (Ty == LLT::fixed_vector(2, S32))
6228 Opc = AArch64::LD4Fourv2s;
6229 else if (Ty == LLT::fixed_vector(4, S32))
6230 Opc = AArch64::LD4Fourv4s;
6231 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6232 Opc = AArch64::LD4Fourv2d;
6233 else if (Ty == S64 || Ty == P0)
6234 Opc = AArch64::LD1Fourv1d;
6235 else
6236 llvm_unreachable("Unexpected type for ld4!");
6237 selectVectorLoadIntrinsic(Opc, 4, I);
6238 break;
6239 }
6240 case Intrinsic::aarch64_neon_ld4lane: {
6241 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6242 unsigned Opc;
6243 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6244 Opc = AArch64::LD4i8;
6245 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6246 Opc = AArch64::LD4i16;
6247 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6248 Opc = AArch64::LD4i32;
6249 else if (Ty == LLT::fixed_vector(2, S64) ||
6250 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6251 Opc = AArch64::LD4i64;
6252 else
6253 llvm_unreachable("Unexpected type for st4lane!");
6254 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6255 return false;
6256 break;
6257 }
6258 case Intrinsic::aarch64_neon_ld4r: {
6259 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6260 unsigned Opc = 0;
6261 if (Ty == LLT::fixed_vector(8, S8))
6262 Opc = AArch64::LD4Rv8b;
6263 else if (Ty == LLT::fixed_vector(16, S8))
6264 Opc = AArch64::LD4Rv16b;
6265 else if (Ty == LLT::fixed_vector(4, S16))
6266 Opc = AArch64::LD4Rv4h;
6267 else if (Ty == LLT::fixed_vector(8, S16))
6268 Opc = AArch64::LD4Rv8h;
6269 else if (Ty == LLT::fixed_vector(2, S32))
6270 Opc = AArch64::LD4Rv2s;
6271 else if (Ty == LLT::fixed_vector(4, S32))
6272 Opc = AArch64::LD4Rv4s;
6273 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6274 Opc = AArch64::LD4Rv2d;
6275 else if (Ty == S64 || Ty == P0)
6276 Opc = AArch64::LD4Rv1d;
6277 else
6278 llvm_unreachable("Unexpected type for ld4r!");
6279 selectVectorLoadIntrinsic(Opc, 4, I);
6280 break;
6281 }
6282 case Intrinsic::aarch64_neon_st1x2: {
6283 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6284 unsigned Opc;
6285 if (Ty == LLT::fixed_vector(8, S8))
6286 Opc = AArch64::ST1Twov8b;
6287 else if (Ty == LLT::fixed_vector(16, S8))
6288 Opc = AArch64::ST1Twov16b;
6289 else if (Ty == LLT::fixed_vector(4, S16))
6290 Opc = AArch64::ST1Twov4h;
6291 else if (Ty == LLT::fixed_vector(8, S16))
6292 Opc = AArch64::ST1Twov8h;
6293 else if (Ty == LLT::fixed_vector(2, S32))
6294 Opc = AArch64::ST1Twov2s;
6295 else if (Ty == LLT::fixed_vector(4, S32))
6296 Opc = AArch64::ST1Twov4s;
6297 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6298 Opc = AArch64::ST1Twov2d;
6299 else if (Ty == S64 || Ty == P0)
6300 Opc = AArch64::ST1Twov1d;
6301 else
6302 llvm_unreachable("Unexpected type for st1x2!");
6303 selectVectorStoreIntrinsic(I, 2, Opc);
6304 break;
6305 }
6306 case Intrinsic::aarch64_neon_st1x3: {
6307 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6308 unsigned Opc;
6309 if (Ty == LLT::fixed_vector(8, S8))
6310 Opc = AArch64::ST1Threev8b;
6311 else if (Ty == LLT::fixed_vector(16, S8))
6312 Opc = AArch64::ST1Threev16b;
6313 else if (Ty == LLT::fixed_vector(4, S16))
6314 Opc = AArch64::ST1Threev4h;
6315 else if (Ty == LLT::fixed_vector(8, S16))
6316 Opc = AArch64::ST1Threev8h;
6317 else if (Ty == LLT::fixed_vector(2, S32))
6318 Opc = AArch64::ST1Threev2s;
6319 else if (Ty == LLT::fixed_vector(4, S32))
6320 Opc = AArch64::ST1Threev4s;
6321 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6322 Opc = AArch64::ST1Threev2d;
6323 else if (Ty == S64 || Ty == P0)
6324 Opc = AArch64::ST1Threev1d;
6325 else
6326 llvm_unreachable("Unexpected type for st1x3!");
6327 selectVectorStoreIntrinsic(I, 3, Opc);
6328 break;
6329 }
6330 case Intrinsic::aarch64_neon_st1x4: {
6331 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6332 unsigned Opc;
6333 if (Ty == LLT::fixed_vector(8, S8))
6334 Opc = AArch64::ST1Fourv8b;
6335 else if (Ty == LLT::fixed_vector(16, S8))
6336 Opc = AArch64::ST1Fourv16b;
6337 else if (Ty == LLT::fixed_vector(4, S16))
6338 Opc = AArch64::ST1Fourv4h;
6339 else if (Ty == LLT::fixed_vector(8, S16))
6340 Opc = AArch64::ST1Fourv8h;
6341 else if (Ty == LLT::fixed_vector(2, S32))
6342 Opc = AArch64::ST1Fourv2s;
6343 else if (Ty == LLT::fixed_vector(4, S32))
6344 Opc = AArch64::ST1Fourv4s;
6345 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6346 Opc = AArch64::ST1Fourv2d;
6347 else if (Ty == S64 || Ty == P0)
6348 Opc = AArch64::ST1Fourv1d;
6349 else
6350 llvm_unreachable("Unexpected type for st1x4!");
6351 selectVectorStoreIntrinsic(I, 4, Opc);
6352 break;
6353 }
6354 case Intrinsic::aarch64_neon_st2: {
6355 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6356 unsigned Opc;
6357 if (Ty == LLT::fixed_vector(8, S8))
6358 Opc = AArch64::ST2Twov8b;
6359 else if (Ty == LLT::fixed_vector(16, S8))
6360 Opc = AArch64::ST2Twov16b;
6361 else if (Ty == LLT::fixed_vector(4, S16))
6362 Opc = AArch64::ST2Twov4h;
6363 else if (Ty == LLT::fixed_vector(8, S16))
6364 Opc = AArch64::ST2Twov8h;
6365 else if (Ty == LLT::fixed_vector(2, S32))
6366 Opc = AArch64::ST2Twov2s;
6367 else if (Ty == LLT::fixed_vector(4, S32))
6368 Opc = AArch64::ST2Twov4s;
6369 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6370 Opc = AArch64::ST2Twov2d;
6371 else if (Ty == S64 || Ty == P0)
6372 Opc = AArch64::ST1Twov1d;
6373 else
6374 llvm_unreachable("Unexpected type for st2!");
6375 selectVectorStoreIntrinsic(I, 2, Opc);
6376 break;
6377 }
6378 case Intrinsic::aarch64_neon_st3: {
6379 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6380 unsigned Opc;
6381 if (Ty == LLT::fixed_vector(8, S8))
6382 Opc = AArch64::ST3Threev8b;
6383 else if (Ty == LLT::fixed_vector(16, S8))
6384 Opc = AArch64::ST3Threev16b;
6385 else if (Ty == LLT::fixed_vector(4, S16))
6386 Opc = AArch64::ST3Threev4h;
6387 else if (Ty == LLT::fixed_vector(8, S16))
6388 Opc = AArch64::ST3Threev8h;
6389 else if (Ty == LLT::fixed_vector(2, S32))
6390 Opc = AArch64::ST3Threev2s;
6391 else if (Ty == LLT::fixed_vector(4, S32))
6392 Opc = AArch64::ST3Threev4s;
6393 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6394 Opc = AArch64::ST3Threev2d;
6395 else if (Ty == S64 || Ty == P0)
6396 Opc = AArch64::ST1Threev1d;
6397 else
6398 llvm_unreachable("Unexpected type for st3!");
6399 selectVectorStoreIntrinsic(I, 3, Opc);
6400 break;
6401 }
6402 case Intrinsic::aarch64_neon_st4: {
6403 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6404 unsigned Opc;
6405 if (Ty == LLT::fixed_vector(8, S8))
6406 Opc = AArch64::ST4Fourv8b;
6407 else if (Ty == LLT::fixed_vector(16, S8))
6408 Opc = AArch64::ST4Fourv16b;
6409 else if (Ty == LLT::fixed_vector(4, S16))
6410 Opc = AArch64::ST4Fourv4h;
6411 else if (Ty == LLT::fixed_vector(8, S16))
6412 Opc = AArch64::ST4Fourv8h;
6413 else if (Ty == LLT::fixed_vector(2, S32))
6414 Opc = AArch64::ST4Fourv2s;
6415 else if (Ty == LLT::fixed_vector(4, S32))
6416 Opc = AArch64::ST4Fourv4s;
6417 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6418 Opc = AArch64::ST4Fourv2d;
6419 else if (Ty == S64 || Ty == P0)
6420 Opc = AArch64::ST1Fourv1d;
6421 else
6422 llvm_unreachable("Unexpected type for st4!");
6423 selectVectorStoreIntrinsic(I, 4, Opc);
6424 break;
6425 }
6426 case Intrinsic::aarch64_neon_st2lane: {
6427 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6428 unsigned Opc;
6429 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6430 Opc = AArch64::ST2i8;
6431 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6432 Opc = AArch64::ST2i16;
6433 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6434 Opc = AArch64::ST2i32;
6435 else if (Ty == LLT::fixed_vector(2, S64) ||
6436 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6437 Opc = AArch64::ST2i64;
6438 else
6439 llvm_unreachable("Unexpected type for st2lane!");
6440 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6441 return false;
6442 break;
6443 }
6444 case Intrinsic::aarch64_neon_st3lane: {
6445 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6446 unsigned Opc;
6447 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6448 Opc = AArch64::ST3i8;
6449 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6450 Opc = AArch64::ST3i16;
6451 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6452 Opc = AArch64::ST3i32;
6453 else if (Ty == LLT::fixed_vector(2, S64) ||
6454 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6455 Opc = AArch64::ST3i64;
6456 else
6457 llvm_unreachable("Unexpected type for st3lane!");
6458 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6459 return false;
6460 break;
6461 }
6462 case Intrinsic::aarch64_neon_st4lane: {
6463 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6464 unsigned Opc;
6465 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6466 Opc = AArch64::ST4i8;
6467 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6468 Opc = AArch64::ST4i16;
6469 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6470 Opc = AArch64::ST4i32;
6471 else if (Ty == LLT::fixed_vector(2, S64) ||
6472 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6473 Opc = AArch64::ST4i64;
6474 else
6475 llvm_unreachable("Unexpected type for st4lane!");
6476 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6477 return false;
6478 break;
6479 }
6480 case Intrinsic::aarch64_mops_memset_tag: {
6481 // Transform
6482 // %dst:gpr(p0) = \
6483 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6484 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6485 // where %dst is updated, into
6486 // %Rd:GPR64common, %Rn:GPR64) = \
6487 // MOPSMemorySetTaggingPseudo \
6488 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6489 // where Rd and Rn are tied.
6490 // It is expected that %val has been extended to s64 in legalization.
6491 // Note that the order of the size/value operands are swapped.
6492
6493 Register DstDef = I.getOperand(0).getReg();
6494 // I.getOperand(1) is the intrinsic function
6495 Register DstUse = I.getOperand(2).getReg();
6496 Register ValUse = I.getOperand(3).getReg();
6497 Register SizeUse = I.getOperand(4).getReg();
6498
6499 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6500 // Therefore an additional virtual register is requried for the updated size
6501 // operand. This value is not accessible via the semantics of the intrinsic.
6502 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6503
6504 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6505 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6506 Memset.cloneMemRefs(I);
6508 break;
6509 }
6510 }
6511
6512 I.eraseFromParent();
6513 return true;
6514}
6515
6516bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6518 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6519
6520 switch (IntrinID) {
6521 default:
6522 break;
6523 case Intrinsic::aarch64_crypto_sha1h: {
6524 Register DstReg = I.getOperand(0).getReg();
6525 Register SrcReg = I.getOperand(2).getReg();
6526
6527 // FIXME: Should this be an assert?
6528 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6529 MRI.getType(SrcReg).getSizeInBits() != 32)
6530 return false;
6531
6532 // The operation has to happen on FPRs. Set up some new FPR registers for
6533 // the source and destination if they are on GPRs.
6534 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6535 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6536 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6537
6538 // Make sure the copy ends up getting constrained properly.
6539 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6540 AArch64::GPR32RegClass, MRI);
6541 }
6542
6543 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6544 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6545
6546 // Actually insert the instruction.
6547 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6548 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6549
6550 // Did we create a new register for the destination?
6551 if (DstReg != I.getOperand(0).getReg()) {
6552 // Yep. Copy the result of the instruction back into the original
6553 // destination.
6554 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6555 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6556 AArch64::GPR32RegClass, MRI);
6557 }
6558
6559 I.eraseFromParent();
6560 return true;
6561 }
6562 case Intrinsic::ptrauth_resign: {
6563 Register DstReg = I.getOperand(0).getReg();
6564 Register ValReg = I.getOperand(2).getReg();
6565 uint64_t AUTKey = I.getOperand(3).getImm();
6566 Register AUTDisc = I.getOperand(4).getReg();
6567 uint64_t PACKey = I.getOperand(5).getImm();
6568 Register PACDisc = I.getOperand(6).getReg();
6569
6570 Register AUTAddrDisc = AUTDisc;
6571 uint16_t AUTConstDiscC = 0;
6572 std::tie(AUTConstDiscC, AUTAddrDisc) =
6574
6575 Register PACAddrDisc = PACDisc;
6576 uint16_t PACConstDiscC = 0;
6577 std::tie(PACConstDiscC, PACAddrDisc) =
6579
6580 MIB.buildCopy({AArch64::X16}, {ValReg});
6581 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6582 MIB.buildInstr(AArch64::AUTPAC)
6583 .addImm(AUTKey)
6584 .addImm(AUTConstDiscC)
6585 .addUse(AUTAddrDisc)
6586 .addImm(PACKey)
6587 .addImm(PACConstDiscC)
6588 .addUse(PACAddrDisc)
6589 .constrainAllUses(TII, TRI, RBI);
6590 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6591
6592 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6593 I.eraseFromParent();
6594 return true;
6595 }
6596 case Intrinsic::ptrauth_auth: {
6597 Register DstReg = I.getOperand(0).getReg();
6598 Register ValReg = I.getOperand(2).getReg();
6599 uint64_t AUTKey = I.getOperand(3).getImm();
6600 Register AUTDisc = I.getOperand(4).getReg();
6601
6602 Register AUTAddrDisc = AUTDisc;
6603 uint16_t AUTConstDiscC = 0;
6604 std::tie(AUTConstDiscC, AUTAddrDisc) =
6606
6607 MIB.buildCopy({AArch64::X16}, {ValReg});
6608 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6609 MIB.buildInstr(AArch64::AUT)
6610 .addImm(AUTKey)
6611 .addImm(AUTConstDiscC)
6612 .addUse(AUTAddrDisc)
6613 .constrainAllUses(TII, TRI, RBI);
6614 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6615
6616 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6617 I.eraseFromParent();
6618 return true;
6619 }
6620 case Intrinsic::frameaddress:
6621 case Intrinsic::returnaddress: {
6622 MachineFunction &MF = *I.getParent()->getParent();
6623 MachineFrameInfo &MFI = MF.getFrameInfo();
6624
6625 unsigned Depth = I.getOperand(2).getImm();
6626 Register DstReg = I.getOperand(0).getReg();
6627 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6628
6629 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6630 if (!MFReturnAddr) {
6631 // Insert the copy from LR/X30 into the entry block, before it can be
6632 // clobbered by anything.
6633 MFI.setReturnAddressIsTaken(true);
6634 MFReturnAddr = getFunctionLiveInPhysReg(
6635 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6636 }
6637
6638 if (STI.hasPAuth()) {
6639 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6640 } else {
6641 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6642 MIB.buildInstr(AArch64::XPACLRI);
6643 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6644 }
6645
6646 I.eraseFromParent();
6647 return true;
6648 }
6649
6650 MFI.setFrameAddressIsTaken(true);
6651 Register FrameAddr(AArch64::FP);
6652 while (Depth--) {
6653 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6654 auto Ldr =
6655 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6657 FrameAddr = NextFrame;
6658 }
6659
6660 if (IntrinID == Intrinsic::frameaddress)
6661 MIB.buildCopy({DstReg}, {FrameAddr});
6662 else {
6663 MFI.setReturnAddressIsTaken(true);
6664
6665 if (STI.hasPAuth()) {
6666 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6667 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6668 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6669 } else {
6670 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6671 .addImm(1);
6672 MIB.buildInstr(AArch64::XPACLRI);
6673 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6674 }
6675 }
6676
6677 I.eraseFromParent();
6678 return true;
6679 }
6680 case Intrinsic::aarch64_neon_tbl2:
6681 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6682 return true;
6683 case Intrinsic::aarch64_neon_tbl3:
6684 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6685 false);
6686 return true;
6687 case Intrinsic::aarch64_neon_tbl4:
6688 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6689 return true;
6690 case Intrinsic::aarch64_neon_tbx2:
6691 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6692 return true;
6693 case Intrinsic::aarch64_neon_tbx3:
6694 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6695 return true;
6696 case Intrinsic::aarch64_neon_tbx4:
6697 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6698 return true;
6699 case Intrinsic::swift_async_context_addr:
6700 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6701 {Register(AArch64::FP)})
6702 .addImm(8)
6703 .addImm(0);
6705
6707 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6708 I.eraseFromParent();
6709 return true;
6710 }
6711 return false;
6712}
6713
6714// G_PTRAUTH_GLOBAL_VALUE lowering
6715//
6716// We have 3 lowering alternatives to choose from:
6717// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6718// If the GV doesn't need a GOT load (i.e., is locally defined)
6719// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6720//
6721// - LOADgotPAC: similar to LOADgot, with added PAC.
6722// If the GV needs a GOT load, materialize the pointer using the usual
6723// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6724// section is assumed to be read-only (for example, via relro mechanism). See
6725// LowerMOVaddrPAC.
6726//
6727// - LOADauthptrstatic: similar to LOADgot, but use a
6728// special stub slot instead of a GOT slot.
6729// Load a signed pointer for symbol 'sym' from a stub slot named
6730// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6731// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6732// .data with an
6733// @AUTH relocation. See LowerLOADauthptrstatic.
6734//
6735// All 3 are pseudos that are expand late to longer sequences: this lets us
6736// provide integrity guarantees on the to-be-signed intermediate values.
6737//
6738// LOADauthptrstatic is undesirable because it requires a large section filled
6739// with often similarly-signed pointers, making it a good harvesting target.
6740// Thus, it's only used for ptrauth references to extern_weak to avoid null
6741// checks.
6742
6743bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6745 Register DefReg = I.getOperand(0).getReg();
6746 Register Addr = I.getOperand(1).getReg();
6747 uint64_t Key = I.getOperand(2).getImm();
6748 Register AddrDisc = I.getOperand(3).getReg();
6749 uint64_t Disc = I.getOperand(4).getImm();
6750 int64_t Offset = 0;
6751
6752 if (Key > AArch64PACKey::LAST)
6753 report_fatal_error("key in ptrauth global out of range [0, " +
6754 Twine((int)AArch64PACKey::LAST) + "]");
6755
6756 // Blend only works if the integer discriminator is 16-bit wide.
6757 if (!isUInt<16>(Disc))
6759 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6760
6761 // Choosing between 3 lowering alternatives is target-specific.
6762 if (!STI.isTargetELF() && !STI.isTargetMachO())
6763 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6764
6765 if (!MRI.hasOneDef(Addr))
6766 return false;
6767
6768 // First match any offset we take from the real global.
6769 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6770 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6771 Register OffsetReg = DefMI->getOperand(2).getReg();
6772 if (!MRI.hasOneDef(OffsetReg))
6773 return false;
6774 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6775 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6776 return false;
6777
6778 Addr = DefMI->getOperand(1).getReg();
6779 if (!MRI.hasOneDef(Addr))
6780 return false;
6781
6782 DefMI = &*MRI.def_instr_begin(Addr);
6783 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6784 }
6785
6786 // We should be left with a genuine unauthenticated GlobalValue.
6787 const GlobalValue *GV;
6788 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6789 GV = DefMI->getOperand(1).getGlobal();
6791 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6792 GV = DefMI->getOperand(2).getGlobal();
6794 } else {
6795 return false;
6796 }
6797
6798 MachineIRBuilder MIB(I);
6799
6800 // Classify the reference to determine whether it needs a GOT load.
6801 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6802 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6803 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6804 "unsupported non-GOT op flags on ptrauth global reference");
6805 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6806 "unsupported non-GOT reference to weak ptrauth global");
6807
6808 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6809 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6810
6811 // Non-extern_weak:
6812 // - No GOT load needed -> MOVaddrPAC
6813 // - GOT load for non-extern_weak -> LOADgotPAC
6814 // Note that we disallow extern_weak refs to avoid null checks later.
6815 if (!GV->hasExternalWeakLinkage()) {
6816 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6817 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6818 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6820 .addImm(Key)
6821 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6822 .addImm(Disc)
6823 .constrainAllUses(TII, TRI, RBI);
6824 MIB.buildCopy(DefReg, Register(AArch64::X16));
6825 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6826 I.eraseFromParent();
6827 return true;
6828 }
6829
6830 // extern_weak -> LOADauthptrstatic
6831
6832 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6833 // offset alone as a pointer if the symbol wasn't available, which would
6834 // probably break null checks in users. Ptrauth complicates things further:
6835 // error out.
6836 if (Offset != 0)
6838 "unsupported non-zero offset in weak ptrauth global reference");
6839
6840 if (HasAddrDisc)
6841 report_fatal_error("unsupported weak addr-div ptrauth global");
6842
6843 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6844 .addGlobalAddress(GV, Offset)
6845 .addImm(Key)
6846 .addImm(Disc);
6847 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6848
6849 I.eraseFromParent();
6850 return true;
6851}
6852
6853void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6855 unsigned NumVec, unsigned Opc1,
6856 unsigned Opc2, bool isExt) {
6857 Register DstReg = I.getOperand(0).getReg();
6858 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6859
6860 // Create the REG_SEQUENCE
6862 for (unsigned i = 0; i < NumVec; i++)
6863 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6864 Register RegSeq = createQTuple(Regs, MIB);
6865
6866 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6868 if (isExt) {
6869 Register Reg = I.getOperand(2).getReg();
6870 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6871 } else
6872 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6874 I.eraseFromParent();
6875}
6876
6878AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6879 auto MaybeImmed = getImmedFromMO(Root);
6880 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6881 return std::nullopt;
6882 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6883 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6884}
6885
6887AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6888 auto MaybeImmed = getImmedFromMO(Root);
6889 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6890 return std::nullopt;
6891 uint64_t Enc = 31 - *MaybeImmed;
6892 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6893}
6894
6896AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6897 auto MaybeImmed = getImmedFromMO(Root);
6898 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6899 return std::nullopt;
6900 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6901 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6902}
6903
6905AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6906 auto MaybeImmed = getImmedFromMO(Root);
6907 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6908 return std::nullopt;
6909 uint64_t Enc = 63 - *MaybeImmed;
6910 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6911}
6912
6913/// Helper to select an immediate value that can be represented as a 12-bit
6914/// value shifted left by either 0 or 12. If it is possible to do so, return
6915/// the immediate and shift value. If not, return std::nullopt.
6916///
6917/// Used by selectArithImmed and selectNegArithImmed.
6919AArch64InstructionSelector::select12BitValueWithLeftShift(
6920 uint64_t Immed) const {
6921 unsigned ShiftAmt;
6922 if (Immed >> 12 == 0) {
6923 ShiftAmt = 0;
6924 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6925 ShiftAmt = 12;
6926 Immed = Immed >> 12;
6927 } else
6928 return std::nullopt;
6929
6930 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6931 return {{
6932 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6933 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6934 }};
6935}
6936
6937/// SelectArithImmed - Select an immediate value that can be represented as
6938/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6939/// Val set to the 12-bit value and Shift set to the shifter operand.
6941AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6942 // This function is called from the addsub_shifted_imm ComplexPattern,
6943 // which lists [imm] as the list of opcode it's interested in, however
6944 // we still need to check whether the operand is actually an immediate
6945 // here because the ComplexPattern opcode list is only used in
6946 // root-level opcode matching.
6947 auto MaybeImmed = getImmedFromMO(Root);
6948 if (MaybeImmed == std::nullopt)
6949 return std::nullopt;
6950 return select12BitValueWithLeftShift(*MaybeImmed);
6951}
6952
6953/// SelectNegArithImmed - As above, but negates the value before trying to
6954/// select it.
6956AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6957 // We need a register here, because we need to know if we have a 64 or 32
6958 // bit immediate.
6959 if (!Root.isReg())
6960 return std::nullopt;
6961 auto MaybeImmed = getImmedFromMO(Root);
6962 if (MaybeImmed == std::nullopt)
6963 return std::nullopt;
6964 uint64_t Immed = *MaybeImmed;
6965
6966 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6967 // have the opposite effect on the C flag, so this pattern mustn't match under
6968 // those circumstances.
6969 if (Immed == 0)
6970 return std::nullopt;
6971
6972 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6973 // the root.
6975 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6976 Immed = ~((uint32_t)Immed) + 1;
6977 else
6978 Immed = ~Immed + 1ULL;
6979
6980 if (Immed & 0xFFFFFFFFFF000000ULL)
6981 return std::nullopt;
6982
6983 Immed &= 0xFFFFFFULL;
6984 return select12BitValueWithLeftShift(Immed);
6985}
6986
6987/// Checks if we are sure that folding MI into load/store addressing mode is
6988/// beneficial or not.
6989///
6990/// Returns:
6991/// - true if folding MI would be beneficial.
6992/// - false if folding MI would be bad.
6993/// - std::nullopt if it is not sure whether folding MI is beneficial.
6994///
6995/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
6996///
6997/// %13:gpr(s64) = G_CONSTANT i64 1
6998/// %8:gpr(s64) = G_SHL %6, %13(s64)
6999/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7000/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7001std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7002 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7003 if (MI.getOpcode() == AArch64::G_SHL) {
7004 // Address operands with shifts are free, except for running on subtargets
7005 // with AddrLSLSlow14.
7006 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7007 MI.getOperand(2).getReg(), MRI)) {
7008 const APInt ShiftVal = ValAndVeg->Value;
7009
7010 // Don't fold if we know this will be slow.
7011 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7012 }
7013 }
7014 return std::nullopt;
7015}
7016
7017/// Return true if it is worth folding MI into an extended register. That is,
7018/// if it's safe to pull it into the addressing mode of a load or store as a
7019/// shift.
7020/// \p IsAddrOperand whether the def of MI is used as an address operand
7021/// (e.g. feeding into an LDR/STR).
7022bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7024 bool IsAddrOperand) const {
7025
7026 // Always fold if there is one use, or if we're optimizing for size.
7027 Register DefReg = MI.getOperand(0).getReg();
7028 if (MRI.hasOneNonDBGUse(DefReg) ||
7029 MI.getParent()->getParent()->getFunction().hasOptSize())
7030 return true;
7031
7032 if (IsAddrOperand) {
7033 // If we are already sure that folding MI is good or bad, return the result.
7034 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7035 return *Worth;
7036
7037 // Fold G_PTR_ADD if its offset operand can be folded
7038 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7039 MachineInstr *OffsetInst =
7040 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7041
7042 // Note, we already know G_PTR_ADD is used by at least two instructions.
7043 // If we are also sure about whether folding is beneficial or not,
7044 // return the result.
7045 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7046 return *Worth;
7047 }
7048 }
7049
7050 // FIXME: Consider checking HasALULSLFast as appropriate.
7051
7052 // We have a fastpath, so folding a shift in and potentially computing it
7053 // many times may be beneficial. Check if this is only used in memory ops.
7054 // If it is, then we should fold.
7055 return all_of(MRI.use_nodbg_instructions(DefReg),
7056 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7057}
7058
7060 switch (Type) {
7061 case AArch64_AM::SXTB:
7062 case AArch64_AM::SXTH:
7063 case AArch64_AM::SXTW:
7064 return true;
7065 default:
7066 return false;
7067 }
7068}
7069
7071AArch64InstructionSelector::selectExtendedSHL(
7073 unsigned SizeInBytes, bool WantsExt) const {
7074 assert(Base.isReg() && "Expected base to be a register operand");
7075 assert(Offset.isReg() && "Expected offset to be a register operand");
7076
7078 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7079
7080 unsigned OffsetOpc = OffsetInst->getOpcode();
7081 bool LookedThroughZExt = false;
7082 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7083 // Try to look through a ZEXT.
7084 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7085 return std::nullopt;
7086
7087 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7088 OffsetOpc = OffsetInst->getOpcode();
7089 LookedThroughZExt = true;
7090
7091 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7092 return std::nullopt;
7093 }
7094 // Make sure that the memory op is a valid size.
7095 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7096 if (LegalShiftVal == 0)
7097 return std::nullopt;
7098 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7099 return std::nullopt;
7100
7101 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7102 // register we will offset is the LHS, and the register containing the
7103 // constant is the RHS.
7104 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7105 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7106 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7107 if (!ValAndVReg) {
7108 // We didn't get a constant on the RHS. If the opcode is a shift, then
7109 // we're done.
7110 if (OffsetOpc == TargetOpcode::G_SHL)
7111 return std::nullopt;
7112
7113 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7114 std::swap(OffsetReg, ConstantReg);
7115 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7116 if (!ValAndVReg)
7117 return std::nullopt;
7118 }
7119
7120 // The value must fit into 3 bits, and must be positive. Make sure that is
7121 // true.
7122 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7123
7124 // Since we're going to pull this into a shift, the constant value must be
7125 // a power of 2. If we got a multiply, then we need to check this.
7126 if (OffsetOpc == TargetOpcode::G_MUL) {
7127 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7128 return std::nullopt;
7129
7130 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7131 ImmVal = Log2_32(ImmVal);
7132 }
7133
7134 if ((ImmVal & 0x7) != ImmVal)
7135 return std::nullopt;
7136
7137 // We are only allowed to shift by LegalShiftVal. This shift value is built
7138 // into the instruction, so we can't just use whatever we want.
7139 if (ImmVal != LegalShiftVal)
7140 return std::nullopt;
7141
7142 unsigned SignExtend = 0;
7143 if (WantsExt) {
7144 // Check if the offset is defined by an extend, unless we looked through a
7145 // G_ZEXT earlier.
7146 if (!LookedThroughZExt) {
7147 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7148 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7150 return std::nullopt;
7151
7152 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7153 // We only support SXTW for signed extension here.
7154 if (SignExtend && Ext != AArch64_AM::SXTW)
7155 return std::nullopt;
7156 OffsetReg = ExtInst->getOperand(1).getReg();
7157 }
7158
7159 // Need a 32-bit wide register here.
7160 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7161 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7162 }
7163
7164 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7165 // offset. Signify that we are shifting by setting the shift flag to 1.
7166 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7167 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7168 [=](MachineInstrBuilder &MIB) {
7169 // Need to add both immediates here to make sure that they are both
7170 // added to the instruction.
7171 MIB.addImm(SignExtend);
7172 MIB.addImm(1);
7173 }}};
7174}
7175
7176/// This is used for computing addresses like this:
7177///
7178/// ldr x1, [x2, x3, lsl #3]
7179///
7180/// Where x2 is the base register, and x3 is an offset register. The shift-left
7181/// is a constant value specific to this load instruction. That is, we'll never
7182/// see anything other than a 3 here (which corresponds to the size of the
7183/// element being loaded.)
7185AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7186 MachineOperand &Root, unsigned SizeInBytes) const {
7187 if (!Root.isReg())
7188 return std::nullopt;
7190
7191 // We want to find something like this:
7192 //
7193 // val = G_CONSTANT LegalShiftVal
7194 // shift = G_SHL off_reg val
7195 // ptr = G_PTR_ADD base_reg shift
7196 // x = G_LOAD ptr
7197 //
7198 // And fold it into this addressing mode:
7199 //
7200 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7201
7202 // Check if we can find the G_PTR_ADD.
7203 MachineInstr *PtrAdd =
7204 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7205 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7206 return std::nullopt;
7207
7208 // Now, try to match an opcode which will match our specific offset.
7209 // We want a G_SHL or a G_MUL.
7210 MachineInstr *OffsetInst =
7212 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7213 OffsetInst->getOperand(0), SizeInBytes,
7214 /*WantsExt=*/false);
7215}
7216
7217/// This is used for computing addresses like this:
7218///
7219/// ldr x1, [x2, x3]
7220///
7221/// Where x2 is the base register, and x3 is an offset register.
7222///
7223/// When possible (or profitable) to fold a G_PTR_ADD into the address
7224/// calculation, this will do so. Otherwise, it will return std::nullopt.
7226AArch64InstructionSelector::selectAddrModeRegisterOffset(
7227 MachineOperand &Root) const {
7229
7230 // We need a GEP.
7231 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7232 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7233 return std::nullopt;
7234
7235 // If this is used more than once, let's not bother folding.
7236 // TODO: Check if they are memory ops. If they are, then we can still fold
7237 // without having to recompute anything.
7238 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7239 return std::nullopt;
7240
7241 // Base is the GEP's LHS, offset is its RHS.
7242 return {{[=](MachineInstrBuilder &MIB) {
7243 MIB.addUse(Gep->getOperand(1).getReg());
7244 },
7245 [=](MachineInstrBuilder &MIB) {
7246 MIB.addUse(Gep->getOperand(2).getReg());
7247 },
7248 [=](MachineInstrBuilder &MIB) {
7249 // Need to add both immediates here to make sure that they are both
7250 // added to the instruction.
7251 MIB.addImm(0);
7252 MIB.addImm(0);
7253 }}};
7254}
7255
7256/// This is intended to be equivalent to selectAddrModeXRO in
7257/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7259AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7260 unsigned SizeInBytes) const {
7262 if (!Root.isReg())
7263 return std::nullopt;
7264 MachineInstr *PtrAdd =
7265 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7266 if (!PtrAdd)
7267 return std::nullopt;
7268
7269 // Check for an immediates which cannot be encoded in the [base + imm]
7270 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7271 // end up with code like:
7272 //
7273 // mov x0, wide
7274 // add x1 base, x0
7275 // ldr x2, [x1, x0]
7276 //
7277 // In this situation, we can use the [base, xreg] addressing mode to save an
7278 // add/sub:
7279 //
7280 // mov x0, wide
7281 // ldr x2, [base, x0]
7282 auto ValAndVReg =
7284 if (ValAndVReg) {
7285 unsigned Scale = Log2_32(SizeInBytes);
7286 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7287
7288 // Skip immediates that can be selected in the load/store addresing
7289 // mode.
7290 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7291 ImmOff < (0x1000 << Scale))
7292 return std::nullopt;
7293
7294 // Helper lambda to decide whether or not it is preferable to emit an add.
7295 auto isPreferredADD = [](int64_t ImmOff) {
7296 // Constants in [0x0, 0xfff] can be encoded in an add.
7297 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7298 return true;
7299
7300 // Can it be encoded in an add lsl #12?
7301 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7302 return false;
7303
7304 // It can be encoded in an add lsl #12, but we may not want to. If it is
7305 // possible to select this as a single movz, then prefer that. A single
7306 // movz is faster than an add with a shift.
7307 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7308 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7309 };
7310
7311 // If the immediate can be encoded in a single add/sub, then bail out.
7312 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7313 return std::nullopt;
7314 }
7315
7316 // Try to fold shifts into the addressing mode.
7317 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7318 if (AddrModeFns)
7319 return AddrModeFns;
7320
7321 // If that doesn't work, see if it's possible to fold in registers from
7322 // a GEP.
7323 return selectAddrModeRegisterOffset(Root);
7324}
7325
7326/// This is used for computing addresses like this:
7327///
7328/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7329///
7330/// Where we have a 64-bit base register, a 32-bit offset register, and an
7331/// extend (which may or may not be signed).
7333AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7334 unsigned SizeInBytes) const {
7336
7337 MachineInstr *PtrAdd =
7338 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7339 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7340 return std::nullopt;
7341
7342 MachineOperand &LHS = PtrAdd->getOperand(1);
7343 MachineOperand &RHS = PtrAdd->getOperand(2);
7345
7346 // The first case is the same as selectAddrModeXRO, except we need an extend.
7347 // In this case, we try to find a shift and extend, and fold them into the
7348 // addressing mode.
7349 //
7350 // E.g.
7351 //
7352 // off_reg = G_Z/S/ANYEXT ext_reg
7353 // val = G_CONSTANT LegalShiftVal
7354 // shift = G_SHL off_reg val
7355 // ptr = G_PTR_ADD base_reg shift
7356 // x = G_LOAD ptr
7357 //
7358 // In this case we can get a load like this:
7359 //
7360 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7361 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7362 SizeInBytes, /*WantsExt=*/true);
7363 if (ExtendedShl)
7364 return ExtendedShl;
7365
7366 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7367 //
7368 // e.g.
7369 // ldr something, [base_reg, ext_reg, sxtw]
7370 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7371 return std::nullopt;
7372
7373 // Check if this is an extend. We'll get an extend type if it is.
7375 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7377 return std::nullopt;
7378
7379 // Need a 32-bit wide register.
7380 MachineIRBuilder MIB(*PtrAdd);
7381 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7382 AArch64::GPR32RegClass, MIB);
7383 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7384
7385 // Base is LHS, offset is ExtReg.
7386 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7387 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7388 [=](MachineInstrBuilder &MIB) {
7389 MIB.addImm(SignExtend);
7390 MIB.addImm(0);
7391 }}};
7392}
7393
7394/// Select a "register plus unscaled signed 9-bit immediate" address. This
7395/// should only match when there is an offset that is not valid for a scaled
7396/// immediate addressing mode. The "Size" argument is the size in bytes of the
7397/// memory reference, which is needed here to know what is valid for a scaled
7398/// immediate.
7400AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7401 unsigned Size) const {
7403 Root.getParent()->getParent()->getParent()->getRegInfo();
7404
7405 if (!Root.isReg())
7406 return std::nullopt;
7407
7408 if (!isBaseWithConstantOffset(Root, MRI))
7409 return std::nullopt;
7410
7411 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7412
7413 MachineOperand &OffImm = RootDef->getOperand(2);
7414 if (!OffImm.isReg())
7415 return std::nullopt;
7416 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7417 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7418 return std::nullopt;
7419 int64_t RHSC;
7420 MachineOperand &RHSOp1 = RHS->getOperand(1);
7421 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7422 return std::nullopt;
7423 RHSC = RHSOp1.getCImm()->getSExtValue();
7424
7425 if (RHSC >= -256 && RHSC < 256) {
7426 MachineOperand &Base = RootDef->getOperand(1);
7427 return {{
7428 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7429 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7430 }};
7431 }
7432 return std::nullopt;
7433}
7434
7436AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7437 unsigned Size,
7438 MachineRegisterInfo &MRI) const {
7439 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7440 return std::nullopt;
7441 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7442 if (Adrp.getOpcode() != AArch64::ADRP)
7443 return std::nullopt;
7444
7445 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7446 auto Offset = Adrp.getOperand(1).getOffset();
7447 if (Offset % Size != 0)
7448 return std::nullopt;
7449
7450 auto GV = Adrp.getOperand(1).getGlobal();
7451 if (GV->isThreadLocal())
7452 return std::nullopt;
7453
7454 auto &MF = *RootDef.getParent()->getParent();
7455 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7456 return std::nullopt;
7457
7458 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7459 MachineIRBuilder MIRBuilder(RootDef);
7460 Register AdrpReg = Adrp.getOperand(0).getReg();
7461 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7462 [=](MachineInstrBuilder &MIB) {
7463 MIB.addGlobalAddress(GV, Offset,
7464 OpFlags | AArch64II::MO_PAGEOFF |
7466 }}};
7467}
7468
7469/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7470/// "Size" argument is the size in bytes of the memory reference, which
7471/// determines the scale.
7473AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7474 unsigned Size) const {
7475 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7477
7478 if (!Root.isReg())
7479 return std::nullopt;
7480
7481 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7482 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7483 return {{
7484 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7485 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7486 }};
7487 }
7488
7490 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7491 if (CM == CodeModel::Small) {
7492 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7493 if (OpFns)
7494 return OpFns;
7495 }
7496
7497 if (isBaseWithConstantOffset(Root, MRI)) {
7498 MachineOperand &LHS = RootDef->getOperand(1);
7499 MachineOperand &RHS = RootDef->getOperand(2);
7500 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7501 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7502
7503 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7504 unsigned Scale = Log2_32(Size);
7505 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7506 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7507 return {{
7508 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7509 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7510 }};
7511
7512 return {{
7513 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7514 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7515 }};
7516 }
7517 }
7518
7519 // Before falling back to our general case, check if the unscaled
7520 // instructions can handle this. If so, that's preferable.
7521 if (selectAddrModeUnscaled(Root, Size))
7522 return std::nullopt;
7523
7524 return {{
7525 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7526 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7527 }};
7528}
7529
7530/// Given a shift instruction, return the correct shift type for that
7531/// instruction.
7533 switch (MI.getOpcode()) {
7534 default:
7536 case TargetOpcode::G_SHL:
7537 return AArch64_AM::LSL;
7538 case TargetOpcode::G_LSHR:
7539 return AArch64_AM::LSR;
7540 case TargetOpcode::G_ASHR:
7541 return AArch64_AM::ASR;
7542 case TargetOpcode::G_ROTR:
7543 return AArch64_AM::ROR;
7544 }
7545}
7546
7547/// Select a "shifted register" operand. If the value is not shifted, set the
7548/// shift operand to a default value of "lsl 0".
7550AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7551 bool AllowROR) const {
7552 if (!Root.isReg())
7553 return std::nullopt;
7555 Root.getParent()->getParent()->getParent()->getRegInfo();
7556
7557 // Check if the operand is defined by an instruction which corresponds to
7558 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7559 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7561 if (ShType == AArch64_AM::InvalidShiftExtend)
7562 return std::nullopt;
7563 if (ShType == AArch64_AM::ROR && !AllowROR)
7564 return std::nullopt;
7565 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7566 return std::nullopt;
7567
7568 // Need an immediate on the RHS.
7569 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7570 auto Immed = getImmedFromMO(ShiftRHS);
7571 if (!Immed)
7572 return std::nullopt;
7573
7574 // We have something that we can fold. Fold in the shift's LHS and RHS into
7575 // the instruction.
7576 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7577 Register ShiftReg = ShiftLHS.getReg();
7578
7579 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7580 unsigned Val = *Immed & (NumBits - 1);
7581 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7582
7583 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7584 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7585}
7586
7587AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7588 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7589 unsigned Opc = MI.getOpcode();
7590
7591 // Handle explicit extend instructions first.
7592 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7593 unsigned Size;
7594 if (Opc == TargetOpcode::G_SEXT)
7595 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7596 else
7597 Size = MI.getOperand(2).getImm();
7598 assert(Size != 64 && "Extend from 64 bits?");
7599 switch (Size) {
7600 case 8:
7601 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7602 case 16:
7603 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7604 case 32:
7605 return AArch64_AM::SXTW;
7606 default:
7608 }
7609 }
7610
7611 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7612 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7613 assert(Size != 64 && "Extend from 64 bits?");
7614 switch (Size) {
7615 case 8:
7616 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7617 case 16:
7618 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7619 case 32:
7620 return AArch64_AM::UXTW;
7621 default:
7623 }
7624 }
7625
7626 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7627 // on the RHS.
7628 if (Opc != TargetOpcode::G_AND)
7630
7631 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7632 if (!MaybeAndMask)
7634 uint64_t AndMask = *MaybeAndMask;
7635 switch (AndMask) {
7636 default:
7638 case 0xFF:
7639 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7640 case 0xFFFF:
7641 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7642 case 0xFFFFFFFF:
7643 return AArch64_AM::UXTW;
7644 }
7645}
7646
7647Register AArch64InstructionSelector::moveScalarRegClass(
7648 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7649 MachineRegisterInfo &MRI = *MIB.getMRI();
7650 auto Ty = MRI.getType(Reg);
7651 assert(!Ty.isVector() && "Expected scalars only!");
7652 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7653 return Reg;
7654
7655 // Create a copy and immediately select it.
7656 // FIXME: We should have an emitCopy function?
7657 auto Copy = MIB.buildCopy({&RC}, {Reg});
7658 selectCopy(*Copy, TII, MRI, TRI, RBI);
7659 return Copy.getReg(0);
7660}
7661
7662/// Select an "extended register" operand. This operand folds in an extend
7663/// followed by an optional left shift.
7665AArch64InstructionSelector::selectArithExtendedRegister(
7666 MachineOperand &Root) const {
7667 if (!Root.isReg())
7668 return std::nullopt;
7670 Root.getParent()->getParent()->getParent()->getRegInfo();
7671
7672 uint64_t ShiftVal = 0;
7673 Register ExtReg;
7675 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7676 if (!RootDef)
7677 return std::nullopt;
7678
7679 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7680 return std::nullopt;
7681
7682 // Check if we can fold a shift and an extend.
7683 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7684 // Look for a constant on the RHS of the shift.
7685 MachineOperand &RHS = RootDef->getOperand(2);
7686 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7687 if (!MaybeShiftVal)
7688 return std::nullopt;
7689 ShiftVal = *MaybeShiftVal;
7690 if (ShiftVal > 4)
7691 return std::nullopt;
7692 // Look for a valid extend instruction on the LHS of the shift.
7693 MachineOperand &LHS = RootDef->getOperand(1);
7694 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7695 if (!ExtDef)
7696 return std::nullopt;
7697 Ext = getExtendTypeForInst(*ExtDef, MRI);
7699 return std::nullopt;
7700 ExtReg = ExtDef->getOperand(1).getReg();
7701 } else {
7702 // Didn't get a shift. Try just folding an extend.
7703 Ext = getExtendTypeForInst(*RootDef, MRI);
7705 return std::nullopt;
7706 ExtReg = RootDef->getOperand(1).getReg();
7707
7708 // If we have a 32 bit instruction which zeroes out the high half of a
7709 // register, we get an implicit zero extend for free. Check if we have one.
7710 // FIXME: We actually emit the extend right now even though we don't have
7711 // to.
7712 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7713 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7714 if (isDef32(*ExtInst))
7715 return std::nullopt;
7716 }
7717 }
7718
7719 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7720 // copy.
7721 MachineIRBuilder MIB(*RootDef);
7722 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7723
7724 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7725 [=](MachineInstrBuilder &MIB) {
7726 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7727 }}};
7728}
7729
7731AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7732 if (!Root.isReg())
7733 return std::nullopt;
7735 Root.getParent()->getParent()->getParent()->getRegInfo();
7736
7737 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7738 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7739 STI.isLittleEndian())
7740 Extract =
7741 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7742 if (!Extract)
7743 return std::nullopt;
7744
7745 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7746 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7747 Register ExtReg = Extract->MI->getOperand(2).getReg();
7748 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7749 }
7750 }
7751 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7752 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7754 Extract->MI->getOperand(2).getReg(), MRI);
7755 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7756 LaneIdx->Value.getSExtValue() == 1) {
7757 Register ExtReg = Extract->MI->getOperand(1).getReg();
7758 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7759 }
7760 }
7761
7762 return std::nullopt;
7763}
7764
7765void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7766 const MachineInstr &MI,
7767 int OpIdx) const {
7768 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7769 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7770 "Expected G_CONSTANT");
7771 std::optional<int64_t> CstVal =
7772 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7773 assert(CstVal && "Expected constant value");
7774 MIB.addImm(*CstVal);
7775}
7776
7777void AArch64InstructionSelector::renderLogicalImm32(
7778 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7779 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7780 "Expected G_CONSTANT");
7781 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7783 MIB.addImm(Enc);
7784}
7785
7786void AArch64InstructionSelector::renderLogicalImm64(
7787 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7788 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7789 "Expected G_CONSTANT");
7790 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7792 MIB.addImm(Enc);
7793}
7794
7795void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7796 const MachineInstr &MI,
7797 int OpIdx) const {
7798 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7799 "Expected G_UBSANTRAP");
7800 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7801}
7802
7803void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7804 const MachineInstr &MI,
7805 int OpIdx) const {
7806 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7807 "Expected G_FCONSTANT");
7808 MIB.addImm(
7809 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7810}
7811
7812void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7813 const MachineInstr &MI,
7814 int OpIdx) const {
7815 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7816 "Expected G_FCONSTANT");
7817 MIB.addImm(
7818 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7819}
7820
7821void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7822 const MachineInstr &MI,
7823 int OpIdx) const {
7824 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7825 "Expected G_FCONSTANT");
7826 MIB.addImm(
7827 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7828}
7829
7830void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7831 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7832 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7833 "Expected G_FCONSTANT");
7835 .getFPImm()
7836 ->getValueAPF()
7837 .bitcastToAPInt()
7838 .getZExtValue()));
7839}
7840
7841bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7842 const MachineInstr &MI, unsigned NumBytes) const {
7843 if (!MI.mayLoadOrStore())
7844 return false;
7845 assert(MI.hasOneMemOperand() &&
7846 "Expected load/store to have only one mem op!");
7847 return (*MI.memoperands_begin())->getSize() == NumBytes;
7848}
7849
7850bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7851 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7852 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7853 return false;
7854
7855 // Only return true if we know the operation will zero-out the high half of
7856 // the 64-bit register. Truncates can be subregister copies, which don't
7857 // zero out the high bits. Copies and other copy-like instructions can be
7858 // fed by truncates, or could be lowered as subregister copies.
7859 switch (MI.getOpcode()) {
7860 default:
7861 return true;
7862 case TargetOpcode::COPY:
7863 case TargetOpcode::G_BITCAST:
7864 case TargetOpcode::G_TRUNC:
7865 case TargetOpcode::G_PHI:
7866 return false;
7867 }
7868}
7869
7870
7871// Perform fixups on the given PHI instruction's operands to force them all
7872// to be the same as the destination regbank.
7874 const AArch64RegisterBankInfo &RBI) {
7875 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7876 Register DstReg = MI.getOperand(0).getReg();
7877 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7878 assert(DstRB && "Expected PHI dst to have regbank assigned");
7879 MachineIRBuilder MIB(MI);
7880
7881 // Go through each operand and ensure it has the same regbank.
7882 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7883 if (!MO.isReg())
7884 continue;
7885 Register OpReg = MO.getReg();
7886 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7887 if (RB != DstRB) {
7888 // Insert a cross-bank copy.
7889 auto *OpDef = MRI.getVRegDef(OpReg);
7890 const LLT &Ty = MRI.getType(OpReg);
7891 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7892
7893 // Any instruction we insert must appear after all PHIs in the block
7894 // for the block to be valid MIR.
7895 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7896 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7897 InsertPt = OpDefBB.getFirstNonPHI();
7898 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7899 auto Copy = MIB.buildCopy(Ty, OpReg);
7900 MRI.setRegBank(Copy.getReg(0), *DstRB);
7901 MO.setReg(Copy.getReg(0));
7902 }
7903 }
7904}
7905
7906void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7907 // We're looking for PHIs, build a list so we don't invalidate iterators.
7910 for (auto &BB : MF) {
7911 for (auto &MI : BB) {
7912 if (MI.getOpcode() == TargetOpcode::G_PHI)
7913 Phis.emplace_back(&MI);
7914 }
7915 }
7916
7917 for (auto *MI : Phis) {
7918 // We need to do some work here if the operand types are < 16 bit and they
7919 // are split across fpr/gpr banks. Since all types <32b on gpr
7920 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7921 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7922 // be selecting heterogenous regbanks for operands if possible, but we
7923 // still need to be able to deal with it here.
7924 //
7925 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7926 // one other operand is on the fpr bank, then we add cross-bank copies
7927 // to homogenize the operand banks. For simplicity the bank that we choose
7928 // to settle on is whatever bank the def operand has. For example:
7929 //
7930 // %endbb:
7931 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7932 // =>
7933 // %bb2:
7934 // ...
7935 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7936 // ...
7937 // %endbb:
7938 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7939 bool HasGPROp = false, HasFPROp = false;
7940 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7941 if (!MO.isReg())
7942 continue;
7943 const LLT &Ty = MRI.getType(MO.getReg());
7944 if (!Ty.isValid() || !Ty.isScalar())
7945 break;
7946 if (Ty.getSizeInBits() >= 32)
7947 break;
7948 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7949 // If for some reason we don't have a regbank yet. Don't try anything.
7950 if (!RB)
7951 break;
7952
7953 if (RB->getID() == AArch64::GPRRegBankID)
7954 HasGPROp = true;
7955 else
7956 HasFPROp = true;
7957 }
7958 // We have heterogenous regbanks, need to fixup.
7959 if (HasGPROp && HasFPROp)
7960 fixupPHIOpBanks(*MI, MRI, RBI);
7961 }
7962}
7963
7964namespace llvm {
7967 const AArch64Subtarget &Subtarget,
7968 const AArch64RegisterBankInfo &RBI) {
7969 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7970}
7971}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
static StringRef getName(Value *V)
const MachineOperand & RHS
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1262
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1497
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:273
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:760
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:768
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:766
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:773
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:767
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
bool isIntPredicate() const
Definition: InstrTypes.h:865
bool isUnsigned() const
Definition: InstrTypes.h:1013
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3027
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:319
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:316
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:161
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:42
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1686
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1745
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:429
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:842
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:232
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
Key
PAL metadata keys.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:910
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:646
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1637
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1935
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:439
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:467
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:493
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.