LLVM 19.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251 MachineOperand &Predicate,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
354 AArch64CC::CondCode Predicate,
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
359 AArch64CC::CondCode Predicate,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
508 MachineOperand &Predicate,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
557#include "AArch64GenGlobalISel.inc"
560#include "AArch64GenGlobalISel.inc"
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
626 }
627
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
630 default:
631 return nullptr;
632 case 8:
633 return &AArch64::FPR8RegClass;
634 case 16:
635 return &AArch64::FPR16RegClass;
636 case 32:
637 return &AArch64::FPR32RegClass;
638 case 64:
639 return &AArch64::FPR64RegClass;
640 case 128:
641 return &AArch64::FPR128RegClass;
642 }
643 }
644
645 return nullptr;
646}
647
648/// Returns the correct subregister to use for a given register class.
650 const TargetRegisterInfo &TRI, unsigned &SubReg) {
651 switch (TRI.getRegSizeInBits(*RC)) {
652 case 8:
653 SubReg = AArch64::bsub;
654 break;
655 case 16:
656 SubReg = AArch64::hsub;
657 break;
658 case 32:
659 if (RC != &AArch64::FPR32RegClass)
660 SubReg = AArch64::sub_32;
661 else
662 SubReg = AArch64::ssub;
663 break;
664 case 64:
665 SubReg = AArch64::dsub;
666 break;
667 default:
669 dbgs() << "Couldn't find appropriate subregister for register class.");
670 return false;
671 }
672
673 return true;
674}
675
676/// Returns the minimum size the given register bank can hold.
677static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
678 switch (RB.getID()) {
679 case AArch64::GPRRegBankID:
680 return 32;
681 case AArch64::FPRRegBankID:
682 return 8;
683 default:
684 llvm_unreachable("Tried to get minimum size for unknown register bank.");
685 }
686}
687
688/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
689/// Helper function for functions like createDTuple and createQTuple.
690///
691/// \p RegClassIDs - The list of register class IDs available for some tuple of
692/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
693/// expected to contain between 2 and 4 tuple classes.
694///
695/// \p SubRegs - The list of subregister classes associated with each register
696/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
697/// subregister class. The index of each subregister class is expected to
698/// correspond with the index of each register class.
699///
700/// \returns Either the destination register of REG_SEQUENCE instruction that
701/// was created, or the 0th element of \p Regs if \p Regs contains a single
702/// element.
704 const unsigned RegClassIDs[],
705 const unsigned SubRegs[], MachineIRBuilder &MIB) {
706 unsigned NumRegs = Regs.size();
707 if (NumRegs == 1)
708 return Regs[0];
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
713 auto RegSequence =
714 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
716 RegSequence.addUse(Regs[I]);
717 RegSequence.addImm(SubRegs[I]);
718 }
719 return RegSequence.getReg(0);
720}
721
722/// Create a tuple of D-registers using the registers in \p Regs.
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
729}
730
731/// Create a tuple of Q-registers using the registers in \p Regs.
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
738}
739
740static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
741 auto &MI = *Root.getParent();
742 auto &MBB = *MI.getParent();
743 auto &MF = *MBB.getParent();
744 auto &MRI = MF.getRegInfo();
745 uint64_t Immed;
746 if (Root.isImm())
747 Immed = Root.getImm();
748 else if (Root.isCImm())
749 Immed = Root.getCImm()->getZExtValue();
750 else if (Root.isReg()) {
751 auto ValAndVReg =
753 if (!ValAndVReg)
754 return std::nullopt;
755 Immed = ValAndVReg->Value.getSExtValue();
756 } else
757 return std::nullopt;
758 return Immed;
759}
760
761/// Check whether \p I is a currently unsupported binary operation:
762/// - it has an unsized type
763/// - an operand is not a vreg
764/// - all operands are not in the same bank
765/// These are checks that should someday live in the verifier, but right now,
766/// these are mostly limitations of the aarch64 selector.
767static bool unsupportedBinOp(const MachineInstr &I,
768 const AArch64RegisterBankInfo &RBI,
770 const AArch64RegisterInfo &TRI) {
771 LLT Ty = MRI.getType(I.getOperand(0).getReg());
772 if (!Ty.isValid()) {
773 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
774 return true;
775 }
776
777 const RegisterBank *PrevOpBank = nullptr;
778 for (auto &MO : I.operands()) {
779 // FIXME: Support non-register operands.
780 if (!MO.isReg()) {
781 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
782 return true;
783 }
784
785 // FIXME: Can generic operations have physical registers operands? If
786 // so, this will need to be taught about that, and we'll need to get the
787 // bank out of the minimal class for the register.
788 // Either way, this needs to be documented (and possibly verified).
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
791 return true;
792 }
793
794 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
795 if (!OpBank) {
796 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
797 return true;
798 }
799
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
802 return true;
803 }
804 PrevOpBank = OpBank;
805 }
806 return false;
807}
808
809/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
810/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
811/// and of size \p OpSize.
812/// \returns \p GenericOpc if the combination is unsupported.
813static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
814 unsigned OpSize) {
815 switch (RegBankID) {
816 case AArch64::GPRRegBankID:
817 if (OpSize == 32) {
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
825 default:
826 return GenericOpc;
827 }
828 } else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
838 default:
839 return GenericOpc;
840 }
841 }
842 break;
843 case AArch64::FPRRegBankID:
844 switch (OpSize) {
845 case 32:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
855 default:
856 return GenericOpc;
857 }
858 case 64:
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
870 default:
871 return GenericOpc;
872 }
873 }
874 break;
875 }
876 return GenericOpc;
877}
878
879/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
880/// appropriate for the (value) register bank \p RegBankID and of memory access
881/// size \p OpSize. This returns the variant with the base+unsigned-immediate
882/// addressing mode (e.g., LDRXui).
883/// \returns \p GenericOpc if the combination is unsupported.
884static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
885 unsigned OpSize) {
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
887 switch (RegBankID) {
888 case AArch64::GPRRegBankID:
889 switch (OpSize) {
890 case 8:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
892 case 16:
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
894 case 32:
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
896 case 64:
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
898 }
899 break;
900 case AArch64::FPRRegBankID:
901 switch (OpSize) {
902 case 8:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
904 case 16:
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
906 case 32:
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
908 case 64:
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
910 case 128:
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
912 }
913 break;
914 }
915 return GenericOpc;
916}
917
918/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
919/// to \p *To.
920///
921/// E.g "To = COPY SrcReg:SubReg"
923 const RegisterBankInfo &RBI, Register SrcReg,
924 const TargetRegisterClass *To, unsigned SubReg) {
925 assert(SrcReg.isValid() && "Expected a valid source register?");
926 assert(To && "Destination register class cannot be null");
927 assert(SubReg && "Expected a valid subregister");
928
929 MachineIRBuilder MIB(I);
930 auto SubRegCopy =
931 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
932 MachineOperand &RegOp = I.getOperand(1);
933 RegOp.setReg(SubRegCopy.getReg(0));
934
935 // It's possible that the destination register won't be constrained. Make
936 // sure that happens.
937 if (!I.getOperand(0).getReg().isPhysical())
938 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
939
940 return true;
941}
942
943/// Helper function to get the source and destination register classes for a
944/// copy. Returns a std::pair containing the source register class for the
945/// copy, and the destination register class for the copy. If a register class
946/// cannot be determined, then it will be nullptr.
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
950 const RegisterBankInfo &RBI) {
951 Register DstReg = I.getOperand(0).getReg();
952 Register SrcReg = I.getOperand(1).getReg();
953 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
954 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
955
956 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
957 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
958
959 // Special casing for cross-bank copies of s1s. We can technically represent
960 // a 1-bit value with any size of register. The minimum size for a GPR is 32
961 // bits. So, we need to put the FPR on 32 bits as well.
962 //
963 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
964 // then we can pull it into the helpers that get the appropriate class for a
965 // register bank. Or make a new helper that carries along some constraint
966 // information.
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
968 SrcSize = DstSize = TypeSize::getFixed(32);
969
970 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
971 getMinClassForRegBank(DstRegBank, DstSize, true)};
972}
973
974// FIXME: We need some sort of API in RBI/TRI to allow generic code to
975// constrain operands of simple instructions given a TargetRegisterClass
976// and LLT
978 const RegisterBankInfo &RBI) {
979 for (MachineOperand &MO : I.operands()) {
980 if (!MO.isReg())
981 continue;
982 Register Reg = MO.getReg();
983 if (!Reg)
984 continue;
985 if (Reg.isPhysical())
986 continue;
987 LLT Ty = MRI.getType(Reg);
988 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
989 const TargetRegisterClass *RC =
990 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
991 if (!RC) {
992 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
993 RC = getRegClassForTypeOnBank(Ty, RB);
994 if (!RC) {
996 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
997 break;
998 }
999 }
1000 RBI.constrainGenericRegister(Reg, *RC, MRI);
1001 }
1002
1003 return true;
1004}
1005
1008 const RegisterBankInfo &RBI) {
1009 Register DstReg = I.getOperand(0).getReg();
1010 Register SrcReg = I.getOperand(1).getReg();
1011 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1012 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1013
1014 // Find the correct register classes for the source and destination registers.
1015 const TargetRegisterClass *SrcRC;
1016 const TargetRegisterClass *DstRC;
1017 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1018
1019 if (!DstRC) {
1020 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1021 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1022 return false;
1023 }
1024
1025 // Is this a copy? If so, then we may need to insert a subregister copy.
1026 if (I.isCopy()) {
1027 // Yes. Check if there's anything to fix up.
1028 if (!SrcRC) {
1029 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1030 return false;
1031 }
1032
1033 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1035 unsigned SubReg;
1036
1037 // If the source bank doesn't support a subregister copy small enough,
1038 // then we first need to copy to the destination bank.
1039 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1040 const TargetRegisterClass *DstTempRC =
1041 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1042 getSubRegForClass(DstRC, TRI, SubReg);
1043
1044 MachineIRBuilder MIB(I);
1045 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1046 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1047 } else if (SrcSize > DstSize) {
1048 // If the source register is bigger than the destination we need to
1049 // perform a subregister copy.
1050 const TargetRegisterClass *SubRegRC =
1051 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1052 getSubRegForClass(SubRegRC, TRI, SubReg);
1053 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1054 } else if (DstSize > SrcSize) {
1055 // If the destination register is bigger than the source we need to do
1056 // a promotion using SUBREG_TO_REG.
1057 const TargetRegisterClass *PromotionRC =
1058 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1059 getSubRegForClass(SrcRC, TRI, SubReg);
1060
1061 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1062 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1064 .addImm(0)
1065 .addUse(SrcReg)
1066 .addImm(SubReg);
1067 MachineOperand &RegOp = I.getOperand(1);
1068 RegOp.setReg(PromoteReg);
1069 }
1070
1071 // If the destination is a physical register, then there's nothing to
1072 // change, so we're done.
1073 if (DstReg.isPhysical())
1074 return true;
1075 }
1076
1077 // No need to constrain SrcReg. It will get constrained when we hit another
1078 // of its use or its defs. Copies do not have constraints.
1079 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1080 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1081 << " operand\n");
1082 return false;
1083 }
1084
1085 // If this a GPR ZEXT that we want to just reduce down into a copy.
1086 // The sizes will be mismatched with the source < 32b but that's ok.
1087 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(TII.get(AArch64::COPY));
1089 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1090 return selectCopy(I, TII, MRI, TRI, RBI);
1091 }
1092
1093 I.setDesc(TII.get(AArch64::COPY));
1094 return true;
1095}
1096
1097static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1098 if (!DstTy.isScalar() || !SrcTy.isScalar())
1099 return GenericOpc;
1100
1101 const unsigned DstSize = DstTy.getSizeInBits();
1102 const unsigned SrcSize = SrcTy.getSizeInBits();
1103
1104 switch (DstSize) {
1105 case 32:
1106 switch (SrcSize) {
1107 case 32:
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1117 default:
1118 return GenericOpc;
1119 }
1120 case 64:
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1130 default:
1131 return GenericOpc;
1132 }
1133 default:
1134 return GenericOpc;
1135 }
1136 case 64:
1137 switch (SrcSize) {
1138 case 32:
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1148 default:
1149 return GenericOpc;
1150 }
1151 case 64:
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1161 default:
1162 return GenericOpc;
1163 }
1164 default:
1165 return GenericOpc;
1166 }
1167 default:
1168 return GenericOpc;
1169 };
1170 return GenericOpc;
1171}
1172
1174AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1176 MachineIRBuilder &MIB) const {
1177 MachineRegisterInfo &MRI = *MIB.getMRI();
1178 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1179 RBI.getRegBank(True, MRI, TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty = MRI.getType(True);
1182 if (Ty.isVector())
1183 return nullptr;
1184 const unsigned Size = Ty.getSizeInBits();
1185 assert((Size == 32 || Size == 64) &&
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit = Size == 32;
1188 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1192 return &*FCSel;
1193 }
1194
1195 // By default, we'll try and emit a CSEL.
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1197 bool Optimized = false;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1199 &Optimized](Register &Reg, Register &OtherReg,
1200 bool Invert) {
1201 if (Optimized)
1202 return false;
1203
1204 // Attempt to fold:
1205 //
1206 // %sub = G_SUB 0, %x
1207 // %select = G_SELECT cc, %reg, %sub
1208 //
1209 // Into:
1210 // %select = CSNEG %reg, %x, cc
1211 Register MatchReg;
1212 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1214 Reg = MatchReg;
1215 if (Invert) {
1217 std::swap(Reg, OtherReg);
1218 }
1219 return true;
1220 }
1221
1222 // Attempt to fold:
1223 //
1224 // %xor = G_XOR %x, -1
1225 // %select = G_SELECT cc, %reg, %xor
1226 //
1227 // Into:
1228 // %select = CSINV %reg, %x, cc
1229 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1231 Reg = MatchReg;
1232 if (Invert) {
1234 std::swap(Reg, OtherReg);
1235 }
1236 return true;
1237 }
1238
1239 // Attempt to fold:
1240 //
1241 // %add = G_ADD %x, 1
1242 // %select = G_SELECT cc, %reg, %add
1243 //
1244 // Into:
1245 // %select = CSINC %reg, %x, cc
1246 if (mi_match(Reg, MRI,
1247 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1248 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1250 Reg = MatchReg;
1251 if (Invert) {
1253 std::swap(Reg, OtherReg);
1254 }
1255 return true;
1256 }
1257
1258 return false;
1259 };
1260
1261 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1262 // true/false values are constants.
1263 // FIXME: All of these patterns already exist in tablegen. We should be
1264 // able to import these.
1265 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1266 &Optimized]() {
1267 if (Optimized)
1268 return false;
1269 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1270 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1271 if (!TrueCst && !FalseCst)
1272 return false;
1273
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t T = TrueCst->Value.getSExtValue();
1277 int64_t F = FalseCst->Value.getSExtValue();
1278
1279 if (T == 0 && F == 1) {
1280 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1282 True = ZReg;
1283 False = ZReg;
1284 return true;
1285 }
1286
1287 if (T == 0 && F == -1) {
1288 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1290 True = ZReg;
1291 False = ZReg;
1292 return true;
1293 }
1294 }
1295
1296 if (TrueCst) {
1297 int64_t T = TrueCst->Value.getSExtValue();
1298 if (T == 1) {
1299 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1301 True = False;
1302 False = ZReg;
1304 return true;
1305 }
1306
1307 if (T == -1) {
1308 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1310 True = False;
1311 False = ZReg;
1313 return true;
1314 }
1315 }
1316
1317 if (FalseCst) {
1318 int64_t F = FalseCst->Value.getSExtValue();
1319 if (F == 1) {
1320 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1322 False = ZReg;
1323 return true;
1324 }
1325
1326 if (F == -1) {
1327 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1329 False = ZReg;
1330 return true;
1331 }
1332 }
1333 return false;
1334 };
1335
1336 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1338 Optimized |= TryOptSelectCst();
1339 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1341 return &*SelectInst;
1342}
1343
1345 switch (P) {
1346 default:
1347 llvm_unreachable("Unknown condition code!");
1348 case CmpInst::ICMP_NE:
1349 return AArch64CC::NE;
1350 case CmpInst::ICMP_EQ:
1351 return AArch64CC::EQ;
1352 case CmpInst::ICMP_SGT:
1353 return AArch64CC::GT;
1354 case CmpInst::ICMP_SGE:
1355 return AArch64CC::GE;
1356 case CmpInst::ICMP_SLT:
1357 return AArch64CC::LT;
1358 case CmpInst::ICMP_SLE:
1359 return AArch64CC::LE;
1360 case CmpInst::ICMP_UGT:
1361 return AArch64CC::HI;
1362 case CmpInst::ICMP_UGE:
1363 return AArch64CC::HS;
1364 case CmpInst::ICMP_ULT:
1365 return AArch64CC::LO;
1366 case CmpInst::ICMP_ULE:
1367 return AArch64CC::LS;
1368 }
1369}
1370
1371/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1373 AArch64CC::CondCode &CondCode,
1374 AArch64CC::CondCode &CondCode2) {
1375 CondCode2 = AArch64CC::AL;
1376 switch (CC) {
1377 default:
1378 llvm_unreachable("Unknown FP condition!");
1379 case CmpInst::FCMP_OEQ:
1380 CondCode = AArch64CC::EQ;
1381 break;
1382 case CmpInst::FCMP_OGT:
1383 CondCode = AArch64CC::GT;
1384 break;
1385 case CmpInst::FCMP_OGE:
1386 CondCode = AArch64CC::GE;
1387 break;
1388 case CmpInst::FCMP_OLT:
1389 CondCode = AArch64CC::MI;
1390 break;
1391 case CmpInst::FCMP_OLE:
1392 CondCode = AArch64CC::LS;
1393 break;
1394 case CmpInst::FCMP_ONE:
1395 CondCode = AArch64CC::MI;
1396 CondCode2 = AArch64CC::GT;
1397 break;
1398 case CmpInst::FCMP_ORD:
1399 CondCode = AArch64CC::VC;
1400 break;
1401 case CmpInst::FCMP_UNO:
1402 CondCode = AArch64CC::VS;
1403 break;
1404 case CmpInst::FCMP_UEQ:
1405 CondCode = AArch64CC::EQ;
1406 CondCode2 = AArch64CC::VS;
1407 break;
1408 case CmpInst::FCMP_UGT:
1409 CondCode = AArch64CC::HI;
1410 break;
1411 case CmpInst::FCMP_UGE:
1412 CondCode = AArch64CC::PL;
1413 break;
1414 case CmpInst::FCMP_ULT:
1415 CondCode = AArch64CC::LT;
1416 break;
1417 case CmpInst::FCMP_ULE:
1418 CondCode = AArch64CC::LE;
1419 break;
1420 case CmpInst::FCMP_UNE:
1421 CondCode = AArch64CC::NE;
1422 break;
1423 }
1424}
1425
1426/// Convert an IR fp condition code to an AArch64 CC.
1427/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1428/// should be AND'ed instead of OR'ed.
1430 AArch64CC::CondCode &CondCode,
1431 AArch64CC::CondCode &CondCode2) {
1432 CondCode2 = AArch64CC::AL;
1433 switch (CC) {
1434 default:
1435 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1436 assert(CondCode2 == AArch64CC::AL);
1437 break;
1438 case CmpInst::FCMP_ONE:
1439 // (a one b)
1440 // == ((a olt b) || (a ogt b))
1441 // == ((a ord b) && (a une b))
1442 CondCode = AArch64CC::VC;
1443 CondCode2 = AArch64CC::NE;
1444 break;
1445 case CmpInst::FCMP_UEQ:
1446 // (a ueq b)
1447 // == ((a uno b) || (a oeq b))
1448 // == ((a ule b) && (a uge b))
1449 CondCode = AArch64CC::PL;
1450 CondCode2 = AArch64CC::LE;
1451 break;
1452 }
1453}
1454
1455/// Return a register which can be used as a bit to test in a TB(N)Z.
1456static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1458 assert(Reg.isValid() && "Expected valid register!");
1459 bool HasZext = false;
1460 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1461 unsigned Opc = MI->getOpcode();
1462
1463 if (!MI->getOperand(0).isReg() ||
1464 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1465 break;
1466
1467 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1468 //
1469 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1470 // on the truncated x is the same as the bit number on x.
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1474 HasZext = true;
1475
1476 Register NextReg = MI->getOperand(1).getReg();
1477 // Did we find something worth folding?
1478 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1479 break;
1480
1481 // NextReg is worth folding. Keep looking.
1482 Reg = NextReg;
1483 continue;
1484 }
1485
1486 // Attempt to find a suitable operation with a constant on one side.
1487 std::optional<uint64_t> C;
1488 Register TestReg;
1489 switch (Opc) {
1490 default:
1491 break;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg = MI->getOperand(1).getReg();
1495 Register ConstantReg = MI->getOperand(2).getReg();
1496 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1497 if (!VRegAndVal) {
1498 // AND commutes, check the other side for a constant.
1499 // FIXME: Can we canonicalize the constant so that it's always on the
1500 // same side at some point earlier?
1501 std::swap(ConstantReg, TestReg);
1502 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1503 }
1504 if (VRegAndVal) {
1505 if (HasZext)
1506 C = VRegAndVal->Value.getZExtValue();
1507 else
1508 C = VRegAndVal->Value.getSExtValue();
1509 }
1510 break;
1511 }
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg = MI->getOperand(1).getReg();
1516 auto VRegAndVal =
1517 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1518 if (VRegAndVal)
1519 C = VRegAndVal->Value.getSExtValue();
1520 break;
1521 }
1522 }
1523
1524 // Didn't find a constant or viable register. Bail out of the loop.
1525 if (!C || !TestReg.isValid())
1526 break;
1527
1528 // We found a suitable instruction with a constant. Check to see if we can
1529 // walk through the instruction.
1530 Register NextReg;
1531 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1532 switch (Opc) {
1533 default:
1534 break;
1535 case TargetOpcode::G_AND:
1536 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1537 if ((*C >> Bit) & 1)
1538 NextReg = TestReg;
1539 break;
1540 case TargetOpcode::G_SHL:
1541 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1542 // the type of the register.
1543 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1544 NextReg = TestReg;
1545 Bit = Bit - *C;
1546 }
1547 break;
1548 case TargetOpcode::G_ASHR:
1549 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1550 // in x
1551 NextReg = TestReg;
1552 Bit = Bit + *C;
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1555 break;
1556 case TargetOpcode::G_LSHR:
1557 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1558 if ((Bit + *C) < TestRegSize) {
1559 NextReg = TestReg;
1560 Bit = Bit + *C;
1561 }
1562 break;
1563 case TargetOpcode::G_XOR:
1564 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1565 // appropriate.
1566 //
1567 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1568 //
1569 // tbz x', b -> tbnz x, b
1570 //
1571 // Because x' only has the b-th bit set if x does not.
1572 if ((*C >> Bit) & 1)
1573 Invert = !Invert;
1574 NextReg = TestReg;
1575 break;
1576 }
1577
1578 // Check if we found anything worth folding.
1579 if (!NextReg.isValid())
1580 return Reg;
1581 Reg = NextReg;
1582 }
1583
1584 return Reg;
1585}
1586
1587MachineInstr *AArch64InstructionSelector::emitTestBit(
1588 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1589 MachineIRBuilder &MIB) const {
1590 assert(TestReg.isValid());
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1593 MachineRegisterInfo &MRI = *MIB.getMRI();
1594
1595 // Attempt to optimize the test bit by walking over instructions.
1596 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1597 LLT Ty = MRI.getType(TestReg);
1598 unsigned Size = Ty.getSizeInBits();
1599 assert(!Ty.isVector() && "Expected a scalar!");
1600 assert(Bit < 64 && "Bit is too large!");
1601
1602 // When the test register is a 64-bit register, we have to narrow to make
1603 // TBNZW work.
1604 bool UseWReg = Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1609 MIB);
1610
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1614 auto TestBitMI =
1615 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1616 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1617 return &*TestBitMI;
1618}
1619
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1621 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1622 MachineIRBuilder &MIB) const {
1623 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1624 // Given something like this:
1625 //
1626 // %x = ...Something...
1627 // %one = G_CONSTANT i64 1
1628 // %zero = G_CONSTANT i64 0
1629 // %and = G_AND %x, %one
1630 // %cmp = G_ICMP intpred(ne), %and, %zero
1631 // %cmp_trunc = G_TRUNC %cmp
1632 // G_BRCOND %cmp_trunc, %bb.3
1633 //
1634 // We want to try and fold the AND into the G_BRCOND and produce either a
1635 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1636 //
1637 // In this case, we'd get
1638 //
1639 // TBNZ %x %bb.3
1640 //
1641
1642 // Check if the AND has a constant on its RHS which we can use as a mask.
1643 // If it's a power of 2, then it's the same as checking a specific bit.
1644 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1645 auto MaybeBit = getIConstantVRegValWithLookThrough(
1646 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1647 if (!MaybeBit)
1648 return false;
1649
1650 int32_t Bit = MaybeBit->Value.exactLogBase2();
1651 if (Bit < 0)
1652 return false;
1653
1654 Register TestReg = AndInst.getOperand(1).getReg();
1655
1656 // Emit a TB(N)Z.
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1658 return true;
1659}
1660
1661MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1662 bool IsNegative,
1663 MachineBasicBlock *DestMBB,
1664 MachineIRBuilder &MIB) const {
1665 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1666 MachineRegisterInfo &MRI = *MIB.getMRI();
1667 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty = MRI.getType(CompareReg);
1671 unsigned Width = Ty.getSizeInBits();
1672 assert(!Ty.isVector() && "Expected scalar only?");
1673 assert(Width <= 64 && "Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1678 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1679 return &*BranchMI;
1680}
1681
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1683 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1684 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1685 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1686 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1687 // totally clean. Some of them require two branches to implement.
1688 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1689 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1690 Pred);
1691 AArch64CC::CondCode CC1, CC2;
1692 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1693 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1694 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1695 if (CC2 != AArch64CC::AL)
1696 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1697 I.eraseFromParent();
1698 return true;
1699}
1700
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1702 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1703 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1704 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1705 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1706 //
1707 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1708 // instructions will not be produced, as they are conditional branch
1709 // instructions that do not set flags.
1710 if (!ProduceNonFlagSettingCondBr)
1711 return false;
1712
1713 MachineRegisterInfo &MRI = *MIB.getMRI();
1714 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1715 auto Pred =
1716 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1717 Register LHS = ICmp.getOperand(2).getReg();
1718 Register RHS = ICmp.getOperand(3).getReg();
1719
1720 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1721 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1722 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1723
1724 // When we can emit a TB(N)Z, prefer that.
1725 //
1726 // Handle non-commutative condition codes first.
1727 // Note that we don't want to do this when we have a G_AND because it can
1728 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1729 if (VRegAndVal && !AndInst) {
1730 int64_t C = VRegAndVal->Value.getSExtValue();
1731
1732 // When we have a greater-than comparison, we can just test if the msb is
1733 // zero.
1734 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1735 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1736 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1737 I.eraseFromParent();
1738 return true;
1739 }
1740
1741 // When we have a less than comparison, we can just test if the msb is not
1742 // zero.
1743 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1744 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1745 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1746 I.eraseFromParent();
1747 return true;
1748 }
1749
1750 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1751 // we can test if the msb is zero.
1752 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1753 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1754 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1755 I.eraseFromParent();
1756 return true;
1757 }
1758 }
1759
1760 // Attempt to handle commutative condition codes. Right now, that's only
1761 // eq/ne.
1762 if (ICmpInst::isEquality(Pred)) {
1763 if (!VRegAndVal) {
1764 std::swap(RHS, LHS);
1765 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1766 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1767 }
1768
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1770 // If there's a G_AND feeding into this branch, try to fold it away by
1771 // emitting a TB(N)Z instead.
1772 //
1773 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1774 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1775 // would be redundant.
1776 if (AndInst &&
1777 tryOptAndIntoCompareBranch(
1778 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1779 I.eraseFromParent();
1780 return true;
1781 }
1782
1783 // Otherwise, try to emit a CB(N)Z instead.
1784 auto LHSTy = MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1786 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1787 I.eraseFromParent();
1788 return true;
1789 }
1790 }
1791 }
1792
1793 return false;
1794}
1795
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1797 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1798 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1799 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1801 return true;
1802
1803 // Couldn't optimize. Emit a compare + a Bcc.
1804 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1805 auto PredOp = ICmp.getOperand(1);
1806 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1808 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1809 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1810 I.eraseFromParent();
1811 return true;
1812}
1813
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg = I.getOperand(0).getReg();
1817 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1818 // Try to select the G_BRCOND using whatever is feeding the condition if
1819 // possible.
1820 unsigned CCMIOpc = CCMI->getOpcode();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1825
1826 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1827 // instructions will not be produced, as they are conditional branch
1828 // instructions that do not set flags.
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1833 return true;
1834 }
1835
1836 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1837 auto TstMI =
1838 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1840 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1842 .addMBB(I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1844 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1845}
1846
1847/// Returns the element immediate value of a vector shift operand if found.
1848/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1849static std::optional<int64_t> getVectorShiftImm(Register Reg,
1851 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1852 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1853 return getAArch64VectorSplatScalar(*OpMI, MRI);
1854}
1855
1856/// Matches and returns the shift immediate value for a SHL instruction given
1857/// a shift operand.
1858static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1860 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1861 if (!ShiftImm)
1862 return std::nullopt;
1863 // Check the immediate is in range for a SHL.
1864 int64_t Imm = *ShiftImm;
1865 if (Imm < 0)
1866 return std::nullopt;
1867 switch (SrcTy.getElementType().getSizeInBits()) {
1868 default:
1869 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1870 return std::nullopt;
1871 case 8:
1872 if (Imm > 7)
1873 return std::nullopt;
1874 break;
1875 case 16:
1876 if (Imm > 15)
1877 return std::nullopt;
1878 break;
1879 case 32:
1880 if (Imm > 31)
1881 return std::nullopt;
1882 break;
1883 case 64:
1884 if (Imm > 63)
1885 return std::nullopt;
1886 break;
1887 }
1888 return Imm;
1889}
1890
1891bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1893 assert(I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg = I.getOperand(0).getReg();
1895 const LLT Ty = MRI.getType(DstReg);
1896 Register Src1Reg = I.getOperand(1).getReg();
1897 Register Src2Reg = I.getOperand(2).getReg();
1898
1899 if (!Ty.isVector())
1900 return false;
1901
1902 // Check if we have a vector of constants on RHS that we can select as the
1903 // immediate form.
1904 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1905
1906 unsigned Opc = 0;
1907 if (Ty == LLT::fixed_vector(2, 64)) {
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1909 } else if (Ty == LLT::fixed_vector(4, 32)) {
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1911 } else if (Ty == LLT::fixed_vector(2, 32)) {
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1913 } else if (Ty == LLT::fixed_vector(4, 16)) {
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1915 } else if (Ty == LLT::fixed_vector(8, 16)) {
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1917 } else if (Ty == LLT::fixed_vector(16, 8)) {
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1919 } else if (Ty == LLT::fixed_vector(8, 8)) {
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1921 } else {
1922 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1923 return false;
1924 }
1925
1926 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1927 if (ImmVal)
1928 Shl.addImm(*ImmVal);
1929 else
1930 Shl.addUse(Src2Reg);
1932 I.eraseFromParent();
1933 return true;
1934}
1935
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg = I.getOperand(0).getReg();
1941 const LLT Ty = MRI.getType(DstReg);
1942 Register Src1Reg = I.getOperand(1).getReg();
1943 Register Src2Reg = I.getOperand(2).getReg();
1944
1945 if (!Ty.isVector())
1946 return false;
1947
1948 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1949
1950 // We expect the immediate case to be lowered in the PostLegalCombiner to
1951 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1952
1953 // There is not a shift right register instruction, but the shift left
1954 // register instruction takes a signed value, where negative numbers specify a
1955 // right shift.
1956
1957 unsigned Opc = 0;
1958 unsigned NegOpc = 0;
1959 const TargetRegisterClass *RC =
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1961 if (Ty == LLT::fixed_vector(2, 64)) {
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1964 } else if (Ty == LLT::fixed_vector(4, 32)) {
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1967 } else if (Ty == LLT::fixed_vector(2, 32)) {
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1970 } else if (Ty == LLT::fixed_vector(4, 16)) {
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1973 } else if (Ty == LLT::fixed_vector(8, 16)) {
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1976 } else if (Ty == LLT::fixed_vector(16, 8)) {
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1979 } else if (Ty == LLT::fixed_vector(8, 8)) {
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1982 } else {
1983 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1984 return false;
1985 }
1986
1987 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1992 return true;
1993}
1994
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
1997 return false;
1998}
1999
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg = I.getOperand(0).getReg();
2004
2005 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2006
2007 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2009 MF.getFunction().getCallingConv())) {
2010 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2011 ? FuncInfo->getVarArgsGPRIndex()
2012 : FuncInfo->getVarArgsStackIndex();
2013 }
2014
2015 auto MIB =
2016 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2017 .addDef(ArgsAddrReg)
2018 .addFrameIndex(FrameIdx)
2019 .addImm(0)
2020 .addImm(0);
2021
2023
2024 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2025 .addUse(ArgsAddrReg)
2026 .addUse(ListReg)
2027 .addImm(0)
2028 .addMemOperand(*I.memoperands_begin());
2029
2031 I.eraseFromParent();
2032 return true;
2033}
2034
2035void AArch64InstructionSelector::materializeLargeCMVal(
2036 MachineInstr &I, const Value *V, unsigned OpFlags) {
2037 MachineBasicBlock &MBB = *I.getParent();
2038 MachineFunction &MF = *MBB.getParent();
2040
2041 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2042 MovZ->addOperand(MF, I.getOperand(1));
2043 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2045 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2047
2048 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2049 Register ForceDstReg) {
2050 Register DstReg = ForceDstReg
2051 ? ForceDstReg
2052 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2053 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2054 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2057 } else {
2058 MovI->addOperand(
2059 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2060 MovZ->getOperand(1).getOffset(), Flags));
2061 }
2062 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2064 return DstReg;
2065 };
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2068 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2069 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2070}
2071
2072bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2073 MachineBasicBlock &MBB = *I.getParent();
2074 MachineFunction &MF = *MBB.getParent();
2076
2077 switch (I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2080 MachineOperand &SrcOp = I.getOperand(0);
2081 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2082 // Allow matching with imported patterns for stores of pointers. Unlike
2083 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2084 // and constrain.
2085 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2086 Register NewSrc = Copy.getReg(0);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2089 Changed = true;
2090 }
2091 return Changed;
2092 }
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(I, MRI);
2095 case TargetOpcode::G_LOAD: {
2096 // For scalar loads of pointers, we try to convert the dest type from p0
2097 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2098 // conversion, this should be ok because all users should have been
2099 // selected already, so the type doesn't matter for them.
2100 Register DstReg = I.getOperand(0).getReg();
2101 const LLT DstTy = MRI.getType(DstReg);
2102 if (!DstTy.isPointer())
2103 return false;
2104 MRI.setType(DstReg, LLT::scalar(64));
2105 return true;
2106 }
2107 case AArch64::G_DUP: {
2108 // Convert the type from p0 to s64 to help selection.
2109 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2110 if (!DstTy.isPointerVector())
2111 return false;
2112 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2113 MRI.setType(I.getOperand(0).getReg(),
2114 DstTy.changeElementType(LLT::scalar(64)));
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2117 return true;
2118 }
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2121 // If both source and destination regbanks are FPR, then convert the opcode
2122 // to G_SITOF so that the importer can select it to an fpr variant.
2123 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2124 // copy.
2125 Register SrcReg = I.getOperand(1).getReg();
2126 LLT SrcTy = MRI.getType(SrcReg);
2127 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2128 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2129 return false;
2130
2131 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(TII.get(AArch64::G_SITOF));
2134 else
2135 I.setDesc(TII.get(AArch64::G_UITOF));
2136 return true;
2137 }
2138 return false;
2139 }
2140 default:
2141 return false;
2142 }
2143}
2144
2145/// This lowering tries to look for G_PTR_ADD instructions and then converts
2146/// them to a standard G_ADD with a COPY on the source.
2147///
2148/// The motivation behind this is to expose the add semantics to the imported
2149/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2150/// because the selector works bottom up, uses before defs. By the time we
2151/// end up trying to select a G_PTR_ADD, we should have already attempted to
2152/// fold this into addressing modes and were therefore unsuccessful.
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2156 Register DstReg = I.getOperand(0).getReg();
2157 Register AddOp1Reg = I.getOperand(1).getReg();
2158 const LLT PtrTy = MRI.getType(DstReg);
2159 if (PtrTy.getAddressSpace() != 0)
2160 return false;
2161
2162 const LLT CastPtrTy =
2163 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2164 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2165 // Set regbanks on the registers.
2166 if (PtrTy.isVector())
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2168 else
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2170
2171 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2172 // %dst(intty) = G_ADD %intbase, off
2173 I.setDesc(TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2178 return false;
2179 }
2180
2181 // Also take the opportunity here to try to do some optimization.
2182 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2183 Register NegatedReg;
2184 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2185 return true;
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(TII.get(TargetOpcode::G_SUB));
2188 return true;
2189}
2190
2191bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2193 // We try to match the immediate variant of LSL, which is actually an alias
2194 // for a special case of UBFM. Otherwise, we fall back to the imported
2195 // selector which will match the register variant.
2196 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2197 const auto &MO = I.getOperand(2);
2198 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2199 if (!VRegAndVal)
2200 return false;
2201
2202 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2203 if (DstTy.isVector())
2204 return false;
2205 bool Is64Bit = DstTy.getSizeInBits() == 64;
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2208
2209 if (!Imm1Fn || !Imm2Fn)
2210 return false;
2211
2212 auto NewI =
2213 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2214 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2215
2216 for (auto &RenderFn : *Imm1Fn)
2217 RenderFn(NewI);
2218 for (auto &RenderFn : *Imm2Fn)
2219 RenderFn(NewI);
2220
2221 I.eraseFromParent();
2222 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2223}
2224
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2228 // If we're storing a scalar, it doesn't matter what register bank that
2229 // scalar is on. All that matters is the size.
2230 //
2231 // So, if we see something like this (with a 32-bit scalar as an example):
2232 //
2233 // %x:gpr(s32) = ... something ...
2234 // %y:fpr(s32) = COPY %x:gpr(s32)
2235 // G_STORE %y:fpr(s32)
2236 //
2237 // We can fix this up into something like this:
2238 //
2239 // G_STORE %x:gpr(s32)
2240 //
2241 // And then continue the selection process normally.
2242 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2243 if (!DefDstReg.isValid())
2244 return false;
2245 LLT DefDstTy = MRI.getType(DefDstReg);
2246 Register StoreSrcReg = I.getOperand(0).getReg();
2247 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2248
2249 // If we get something strange like a physical register, then we shouldn't
2250 // go any further.
2251 if (!DefDstTy.isValid())
2252 return false;
2253
2254 // Are the source and dst types the same size?
2255 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2256 return false;
2257
2258 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2259 RBI.getRegBank(DefDstReg, MRI, TRI))
2260 return false;
2261
2262 // We have a cross-bank copy, which is entering a store. Let's fold it.
2263 I.getOperand(0).setReg(DefDstReg);
2264 return true;
2265}
2266
2267bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2268 assert(I.getParent() && "Instruction should be in a basic block!");
2269 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2270
2271 MachineBasicBlock &MBB = *I.getParent();
2272 MachineFunction &MF = *MBB.getParent();
2274
2275 switch (I.getOpcode()) {
2276 case AArch64::G_DUP: {
2277 // Before selecting a DUP instruction, check if it is better selected as a
2278 // MOV or load from a constant pool.
2279 Register Src = I.getOperand(1).getReg();
2280 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2281 if (!ValAndVReg)
2282 return false;
2283 LLVMContext &Ctx = MF.getFunction().getContext();
2284 Register Dst = I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2287 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2288 ValAndVReg->Value));
2289 if (!emitConstantVector(Dst, CV, MIB, MRI))
2290 return false;
2291 I.eraseFromParent();
2292 return true;
2293 }
2294 case TargetOpcode::G_SEXT:
2295 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2296 // over a normal extend.
2297 if (selectUSMovFromExtend(I, MRI))
2298 return true;
2299 return false;
2300 case TargetOpcode::G_BR:
2301 return false;
2302 case TargetOpcode::G_SHL:
2303 return earlySelectSHL(I, MRI);
2304 case TargetOpcode::G_CONSTANT: {
2305 bool IsZero = false;
2306 if (I.getOperand(1).isCImm())
2307 IsZero = I.getOperand(1).getCImm()->isZero();
2308 else if (I.getOperand(1).isImm())
2309 IsZero = I.getOperand(1).getImm() == 0;
2310
2311 if (!IsZero)
2312 return false;
2313
2314 Register DefReg = I.getOperand(0).getReg();
2315 LLT Ty = MRI.getType(DefReg);
2316 if (Ty.getSizeInBits() == 64) {
2317 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2318 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2319 } else if (Ty.getSizeInBits() == 32) {
2320 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2321 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2322 } else
2323 return false;
2324
2325 I.setDesc(TII.get(TargetOpcode::COPY));
2326 return true;
2327 }
2328
2329 case TargetOpcode::G_ADD: {
2330 // Check if this is being fed by a G_ICMP on either side.
2331 //
2332 // (cmp pred, x, y) + z
2333 //
2334 // In the above case, when the cmp is true, we increment z by 1. So, we can
2335 // fold the add into the cset for the cmp by using cinc.
2336 //
2337 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2338 Register AddDst = I.getOperand(0).getReg();
2339 Register AddLHS = I.getOperand(1).getReg();
2340 Register AddRHS = I.getOperand(2).getReg();
2341 // Only handle scalars.
2342 LLT Ty = MRI.getType(AddLHS);
2343 if (Ty.isVector())
2344 return false;
2345 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2346 // bits.
2347 unsigned Size = Ty.getSizeInBits();
2348 if (Size != 32 && Size != 64)
2349 return false;
2350 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2351 if (!MRI.hasOneNonDBGUse(Reg))
2352 return nullptr;
2353 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2354 // compare.
2355 if (Size == 32)
2356 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2357 // We model scalar compares using 32-bit destinations right now.
2358 // If it's a 64-bit compare, it'll have 64-bit sources.
2359 Register ZExt;
2360 if (!mi_match(Reg, MRI,
2362 return nullptr;
2363 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2364 if (!Cmp ||
2365 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2366 return nullptr;
2367 return Cmp;
2368 };
2369 // Try to match
2370 // z + (cmp pred, x, y)
2371 MachineInstr *Cmp = MatchCmp(AddRHS);
2372 if (!Cmp) {
2373 // (cmp pred, x, y) + z
2374 std::swap(AddLHS, AddRHS);
2375 Cmp = MatchCmp(AddRHS);
2376 if (!Cmp)
2377 return false;
2378 }
2379 auto &PredOp = Cmp->getOperand(1);
2380 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2381 const AArch64CC::CondCode InvCC =
2384 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2385 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2386 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2387 I.eraseFromParent();
2388 return true;
2389 }
2390 case TargetOpcode::G_OR: {
2391 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2392 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2393 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2394 Register Dst = I.getOperand(0).getReg();
2395 LLT Ty = MRI.getType(Dst);
2396
2397 if (!Ty.isScalar())
2398 return false;
2399
2400 unsigned Size = Ty.getSizeInBits();
2401 if (Size != 32 && Size != 64)
2402 return false;
2403
2404 Register ShiftSrc;
2405 int64_t ShiftImm;
2406 Register MaskSrc;
2407 int64_t MaskImm;
2408 if (!mi_match(
2409 Dst, MRI,
2410 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2411 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2412 return false;
2413
2414 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2415 return false;
2416
2417 int64_t Immr = Size - ShiftImm;
2418 int64_t Imms = Size - ShiftImm - 1;
2419 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2420 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2421 I.eraseFromParent();
2422 return true;
2423 }
2424 case TargetOpcode::G_FENCE: {
2425 if (I.getOperand(1).getImm() == 0)
2426 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2427 else
2428 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2429 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2430 I.eraseFromParent();
2431 return true;
2432 }
2433 default:
2434 return false;
2435 }
2436}
2437
2438bool AArch64InstructionSelector::select(MachineInstr &I) {
2439 assert(I.getParent() && "Instruction should be in a basic block!");
2440 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2441
2442 MachineBasicBlock &MBB = *I.getParent();
2443 MachineFunction &MF = *MBB.getParent();
2445
2446 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2447 if (Subtarget->requiresStrictAlign()) {
2448 // We don't support this feature yet.
2449 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2450 return false;
2451 }
2452
2454
2455 unsigned Opcode = I.getOpcode();
2456 // G_PHI requires same handling as PHI
2457 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2458 // Certain non-generic instructions also need some special handling.
2459
2460 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2462
2463 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2464 const Register DefReg = I.getOperand(0).getReg();
2465 const LLT DefTy = MRI.getType(DefReg);
2466
2467 const RegClassOrRegBank &RegClassOrBank =
2468 MRI.getRegClassOrRegBank(DefReg);
2469
2470 const TargetRegisterClass *DefRC
2471 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2472 if (!DefRC) {
2473 if (!DefTy.isValid()) {
2474 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2475 return false;
2476 }
2477 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2478 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2479 if (!DefRC) {
2480 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2481 return false;
2482 }
2483 }
2484
2485 I.setDesc(TII.get(TargetOpcode::PHI));
2486
2487 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2488 }
2489
2490 if (I.isCopy())
2491 return selectCopy(I, TII, MRI, TRI, RBI);
2492
2493 if (I.isDebugInstr())
2494 return selectDebugInstr(I, MRI, RBI);
2495
2496 return true;
2497 }
2498
2499
2500 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2501 LLVM_DEBUG(
2502 dbgs() << "Generic instruction has unexpected implicit operands\n");
2503 return false;
2504 }
2505
2506 // Try to do some lowering before we start instruction selecting. These
2507 // lowerings are purely transformations on the input G_MIR and so selection
2508 // must continue after any modification of the instruction.
2509 if (preISelLower(I)) {
2510 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2511 }
2512
2513 // There may be patterns where the importer can't deal with them optimally,
2514 // but does select it to a suboptimal sequence so our custom C++ selection
2515 // code later never has a chance to work on it. Therefore, we have an early
2516 // selection attempt here to give priority to certain selection routines
2517 // over the imported ones.
2518 if (earlySelect(I))
2519 return true;
2520
2521 if (selectImpl(I, *CoverageInfo))
2522 return true;
2523
2524 LLT Ty =
2525 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2526
2527 switch (Opcode) {
2528 case TargetOpcode::G_SBFX:
2529 case TargetOpcode::G_UBFX: {
2530 static const unsigned OpcTable[2][2] = {
2531 {AArch64::UBFMWri, AArch64::UBFMXri},
2532 {AArch64::SBFMWri, AArch64::SBFMXri}};
2533 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2534 unsigned Size = Ty.getSizeInBits();
2535 unsigned Opc = OpcTable[IsSigned][Size == 64];
2536 auto Cst1 =
2537 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2538 assert(Cst1 && "Should have gotten a constant for src 1?");
2539 auto Cst2 =
2540 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2541 assert(Cst2 && "Should have gotten a constant for src 2?");
2542 auto LSB = Cst1->Value.getZExtValue();
2543 auto Width = Cst2->Value.getZExtValue();
2544 auto BitfieldInst =
2545 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2546 .addImm(LSB)
2547 .addImm(LSB + Width - 1);
2548 I.eraseFromParent();
2549 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2550 }
2551 case TargetOpcode::G_BRCOND:
2552 return selectCompareBranch(I, MF, MRI);
2553
2554 case TargetOpcode::G_BRINDIRECT: {
2555 I.setDesc(TII.get(AArch64::BR));
2557 }
2558
2559 case TargetOpcode::G_BRJT:
2560 return selectBrJT(I, MRI);
2561
2562 case AArch64::G_ADD_LOW: {
2563 // This op may have been separated from it's ADRP companion by the localizer
2564 // or some other code motion pass. Given that many CPUs will try to
2565 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2566 // which will later be expanded into an ADRP+ADD pair after scheduling.
2567 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2568 if (BaseMI->getOpcode() != AArch64::ADRP) {
2569 I.setDesc(TII.get(AArch64::ADDXri));
2570 I.addOperand(MachineOperand::CreateImm(0));
2572 }
2573 assert(TM.getCodeModel() == CodeModel::Small &&
2574 "Expected small code model");
2575 auto Op1 = BaseMI->getOperand(1);
2576 auto Op2 = I.getOperand(2);
2577 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2578 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2579 Op1.getTargetFlags())
2580 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2581 Op2.getTargetFlags());
2582 I.eraseFromParent();
2583 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2584 }
2585
2586 case TargetOpcode::G_FCONSTANT:
2587 case TargetOpcode::G_CONSTANT: {
2588 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2589
2590 const LLT s8 = LLT::scalar(8);
2591 const LLT s16 = LLT::scalar(16);
2592 const LLT s32 = LLT::scalar(32);
2593 const LLT s64 = LLT::scalar(64);
2594 const LLT s128 = LLT::scalar(128);
2595 const LLT p0 = LLT::pointer(0, 64);
2596
2597 const Register DefReg = I.getOperand(0).getReg();
2598 const LLT DefTy = MRI.getType(DefReg);
2599 const unsigned DefSize = DefTy.getSizeInBits();
2600 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2601
2602 // FIXME: Redundant check, but even less readable when factored out.
2603 if (isFP) {
2604 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2605 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2606 << " constant, expected: " << s16 << " or " << s32
2607 << " or " << s64 << " or " << s128 << '\n');
2608 return false;
2609 }
2610
2611 if (RB.getID() != AArch64::FPRRegBankID) {
2612 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2613 << " constant on bank: " << RB
2614 << ", expected: FPR\n");
2615 return false;
2616 }
2617
2618 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2619 // can be sure tablegen works correctly and isn't rescued by this code.
2620 // 0.0 is not covered by tablegen for FP128. So we will handle this
2621 // scenario in the code here.
2622 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2623 return false;
2624 } else {
2625 // s32 and s64 are covered by tablegen.
2626 if (Ty != p0 && Ty != s8 && Ty != s16) {
2627 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2628 << " constant, expected: " << s32 << ", " << s64
2629 << ", or " << p0 << '\n');
2630 return false;
2631 }
2632
2633 if (RB.getID() != AArch64::GPRRegBankID) {
2634 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2635 << " constant on bank: " << RB
2636 << ", expected: GPR\n");
2637 return false;
2638 }
2639 }
2640
2641 if (isFP) {
2642 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2643 // For 16, 64, and 128b values, emit a constant pool load.
2644 switch (DefSize) {
2645 default:
2646 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2647 case 32:
2648 case 64: {
2649 bool OptForSize = shouldOptForSize(&MF);
2650 const auto &TLI = MF.getSubtarget().getTargetLowering();
2651 // If TLI says that this fpimm is illegal, then we'll expand to a
2652 // constant pool load.
2653 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2654 EVT::getFloatingPointVT(DefSize), OptForSize))
2655 break;
2656 [[fallthrough]];
2657 }
2658 case 16:
2659 case 128: {
2660 auto *FPImm = I.getOperand(1).getFPImm();
2661 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2662 if (!LoadMI) {
2663 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2664 return false;
2665 }
2666 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2667 I.eraseFromParent();
2668 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2669 }
2670 }
2671
2672 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2673 // Either emit a FMOV, or emit a copy to emit a normal mov.
2674 const Register DefGPRReg = MRI.createVirtualRegister(
2675 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2676 MachineOperand &RegOp = I.getOperand(0);
2677 RegOp.setReg(DefGPRReg);
2678 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2679 MIB.buildCopy({DefReg}, {DefGPRReg});
2680
2681 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2682 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2683 return false;
2684 }
2685
2686 MachineOperand &ImmOp = I.getOperand(1);
2687 // FIXME: Is going through int64_t always correct?
2688 ImmOp.ChangeToImmediate(
2690 } else if (I.getOperand(1).isCImm()) {
2691 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2692 I.getOperand(1).ChangeToImmediate(Val);
2693 } else if (I.getOperand(1).isImm()) {
2694 uint64_t Val = I.getOperand(1).getImm();
2695 I.getOperand(1).ChangeToImmediate(Val);
2696 }
2697
2698 const unsigned MovOpc =
2699 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2700 I.setDesc(TII.get(MovOpc));
2702 return true;
2703 }
2704 case TargetOpcode::G_EXTRACT: {
2705 Register DstReg = I.getOperand(0).getReg();
2706 Register SrcReg = I.getOperand(1).getReg();
2707 LLT SrcTy = MRI.getType(SrcReg);
2708 LLT DstTy = MRI.getType(DstReg);
2709 (void)DstTy;
2710 unsigned SrcSize = SrcTy.getSizeInBits();
2711
2712 if (SrcTy.getSizeInBits() > 64) {
2713 // This should be an extract of an s128, which is like a vector extract.
2714 if (SrcTy.getSizeInBits() != 128)
2715 return false;
2716 // Only support extracting 64 bits from an s128 at the moment.
2717 if (DstTy.getSizeInBits() != 64)
2718 return false;
2719
2720 unsigned Offset = I.getOperand(2).getImm();
2721 if (Offset % 64 != 0)
2722 return false;
2723
2724 // Check we have the right regbank always.
2725 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2726 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2727 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2728
2729 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2730 auto NewI =
2731 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2732 .addUse(SrcReg, 0,
2733 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2734 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2735 AArch64::GPR64RegClass, NewI->getOperand(0));
2736 I.eraseFromParent();
2737 return true;
2738 }
2739
2740 // Emit the same code as a vector extract.
2741 // Offset must be a multiple of 64.
2742 unsigned LaneIdx = Offset / 64;
2743 MachineInstr *Extract = emitExtractVectorElt(
2744 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2745 if (!Extract)
2746 return false;
2747 I.eraseFromParent();
2748 return true;
2749 }
2750
2751 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2752 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2753 Ty.getSizeInBits() - 1);
2754
2755 if (SrcSize < 64) {
2756 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2757 "unexpected G_EXTRACT types");
2759 }
2760
2761 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2762 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2763 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2764 .addReg(DstReg, 0, AArch64::sub_32);
2765 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2766 AArch64::GPR32RegClass, MRI);
2767 I.getOperand(0).setReg(DstReg);
2768
2770 }
2771
2772 case TargetOpcode::G_INSERT: {
2773 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2774 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2775 unsigned DstSize = DstTy.getSizeInBits();
2776 // Larger inserts are vectors, same-size ones should be something else by
2777 // now (split up or turned into COPYs).
2778 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2779 return false;
2780
2781 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2782 unsigned LSB = I.getOperand(3).getImm();
2783 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2784 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2785 MachineInstrBuilder(MF, I).addImm(Width - 1);
2786
2787 if (DstSize < 64) {
2788 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2789 "unexpected G_INSERT types");
2791 }
2792
2793 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2794 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2795 TII.get(AArch64::SUBREG_TO_REG))
2796 .addDef(SrcReg)
2797 .addImm(0)
2798 .addUse(I.getOperand(2).getReg())
2799 .addImm(AArch64::sub_32);
2800 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2801 AArch64::GPR32RegClass, MRI);
2802 I.getOperand(2).setReg(SrcReg);
2803
2805 }
2806 case TargetOpcode::G_FRAME_INDEX: {
2807 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2808 if (Ty != LLT::pointer(0, 64)) {
2809 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2810 << ", expected: " << LLT::pointer(0, 64) << '\n');
2811 return false;
2812 }
2813 I.setDesc(TII.get(AArch64::ADDXri));
2814
2815 // MOs for a #0 shifted immediate.
2816 I.addOperand(MachineOperand::CreateImm(0));
2817 I.addOperand(MachineOperand::CreateImm(0));
2818
2820 }
2821
2822 case TargetOpcode::G_GLOBAL_VALUE: {
2823 const GlobalValue *GV = nullptr;
2824 unsigned OpFlags;
2825 if (I.getOperand(1).isSymbol()) {
2826 OpFlags = I.getOperand(1).getTargetFlags();
2827 // Currently only used by "RtLibUseGOT".
2828 assert(OpFlags == AArch64II::MO_GOT);
2829 } else {
2830 GV = I.getOperand(1).getGlobal();
2831 if (GV->isThreadLocal())
2832 return selectTLSGlobalValue(I, MRI);
2833 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2834 }
2835
2836 if (OpFlags & AArch64II::MO_GOT) {
2837 I.setDesc(TII.get(AArch64::LOADgot));
2838 I.getOperand(1).setTargetFlags(OpFlags);
2839 } else if (TM.getCodeModel() == CodeModel::Large &&
2840 !TM.isPositionIndependent()) {
2841 // Materialize the global using movz/movk instructions.
2842 materializeLargeCMVal(I, GV, OpFlags);
2843 I.eraseFromParent();
2844 return true;
2845 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2846 I.setDesc(TII.get(AArch64::ADR));
2847 I.getOperand(1).setTargetFlags(OpFlags);
2848 } else {
2849 I.setDesc(TII.get(AArch64::MOVaddr));
2850 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2851 MachineInstrBuilder MIB(MF, I);
2852 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2854 }
2856 }
2857
2858 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2859 return selectPtrAuthGlobalValue(I, MRI);
2860
2861 case TargetOpcode::G_ZEXTLOAD:
2862 case TargetOpcode::G_LOAD:
2863 case TargetOpcode::G_STORE: {
2864 GLoadStore &LdSt = cast<GLoadStore>(I);
2865 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2866 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2867
2868 if (PtrTy != LLT::pointer(0, 64)) {
2869 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2870 << ", expected: " << LLT::pointer(0, 64) << '\n');
2871 return false;
2872 }
2873
2874 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2875 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2876 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2877
2878 // Need special instructions for atomics that affect ordering.
2879 if (Order != AtomicOrdering::NotAtomic &&
2880 Order != AtomicOrdering::Unordered &&
2881 Order != AtomicOrdering::Monotonic) {
2882 assert(!isa<GZExtLoad>(LdSt));
2883 assert(MemSizeInBytes <= 8 &&
2884 "128-bit atomics should already be custom-legalized");
2885
2886 if (isa<GLoad>(LdSt)) {
2887 static constexpr unsigned LDAPROpcodes[] = {
2888 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2889 static constexpr unsigned LDAROpcodes[] = {
2890 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2891 ArrayRef<unsigned> Opcodes =
2892 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2893 ? LDAPROpcodes
2894 : LDAROpcodes;
2895 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2896 } else {
2897 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2898 AArch64::STLRW, AArch64::STLRX};
2899 Register ValReg = LdSt.getReg(0);
2900 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2901 // Emit a subreg copy of 32 bits.
2902 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2903 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2904 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2905 I.getOperand(0).setReg(NewVal);
2906 }
2907 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2908 }
2910 return true;
2911 }
2912
2913#ifndef NDEBUG
2914 const Register PtrReg = LdSt.getPointerReg();
2915 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2916 // Check that the pointer register is valid.
2917 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2918 "Load/Store pointer operand isn't a GPR");
2919 assert(MRI.getType(PtrReg).isPointer() &&
2920 "Load/Store pointer operand isn't a pointer");
2921#endif
2922
2923 const Register ValReg = LdSt.getReg(0);
2924 const LLT ValTy = MRI.getType(ValReg);
2925 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2926
2927 // The code below doesn't support truncating stores, so we need to split it
2928 // again.
2929 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2930 unsigned SubReg;
2931 LLT MemTy = LdSt.getMMO().getMemoryType();
2932 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2933 if (!getSubRegForClass(RC, TRI, SubReg))
2934 return false;
2935
2936 // Generate a subreg copy.
2937 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2938 .addReg(ValReg, 0, SubReg)
2939 .getReg(0);
2940 RBI.constrainGenericRegister(Copy, *RC, MRI);
2941 LdSt.getOperand(0).setReg(Copy);
2942 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2943 // If this is an any-extending load from the FPR bank, split it into a regular
2944 // load + extend.
2945 if (RB.getID() == AArch64::FPRRegBankID) {
2946 unsigned SubReg;
2947 LLT MemTy = LdSt.getMMO().getMemoryType();
2948 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2949 if (!getSubRegForClass(RC, TRI, SubReg))
2950 return false;
2951 Register OldDst = LdSt.getReg(0);
2952 Register NewDst =
2953 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2954 LdSt.getOperand(0).setReg(NewDst);
2955 MRI.setRegBank(NewDst, RB);
2956 // Generate a SUBREG_TO_REG to extend it.
2957 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2958 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2959 .addImm(0)
2960 .addUse(NewDst)
2961 .addImm(SubReg);
2962 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2963 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2964 MIB.setInstr(LdSt);
2965 }
2966 }
2967
2968 // Helper lambda for partially selecting I. Either returns the original
2969 // instruction with an updated opcode, or a new instruction.
2970 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2971 bool IsStore = isa<GStore>(I);
2972 const unsigned NewOpc =
2973 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2974 if (NewOpc == I.getOpcode())
2975 return nullptr;
2976 // Check if we can fold anything into the addressing mode.
2977 auto AddrModeFns =
2978 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2979 if (!AddrModeFns) {
2980 // Can't fold anything. Use the original instruction.
2981 I.setDesc(TII.get(NewOpc));
2982 I.addOperand(MachineOperand::CreateImm(0));
2983 return &I;
2984 }
2985
2986 // Folded something. Create a new instruction and return it.
2987 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2988 Register CurValReg = I.getOperand(0).getReg();
2989 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2990 NewInst.cloneMemRefs(I);
2991 for (auto &Fn : *AddrModeFns)
2992 Fn(NewInst);
2993 I.eraseFromParent();
2994 return &*NewInst;
2995 };
2996
2997 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2998 if (!LoadStore)
2999 return false;
3000
3001 // If we're storing a 0, use WZR/XZR.
3002 if (Opcode == TargetOpcode::G_STORE) {
3004 LoadStore->getOperand(0).getReg(), MRI);
3005 if (CVal && CVal->Value == 0) {
3006 switch (LoadStore->getOpcode()) {
3007 case AArch64::STRWui:
3008 case AArch64::STRHHui:
3009 case AArch64::STRBBui:
3010 LoadStore->getOperand(0).setReg(AArch64::WZR);
3011 break;
3012 case AArch64::STRXui:
3013 LoadStore->getOperand(0).setReg(AArch64::XZR);
3014 break;
3015 }
3016 }
3017 }
3018
3019 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3020 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3021 // The any/zextload from a smaller type to i32 should be handled by the
3022 // importer.
3023 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3024 return false;
3025 // If we have an extending load then change the load's type to be a
3026 // narrower reg and zero_extend with SUBREG_TO_REG.
3027 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3028 Register DstReg = LoadStore->getOperand(0).getReg();
3029 LoadStore->getOperand(0).setReg(LdReg);
3030
3031 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3032 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3033 .addImm(0)
3034 .addUse(LdReg)
3035 .addImm(AArch64::sub_32);
3036 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3037 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3038 MRI);
3039 }
3040 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3041 }
3042
3043 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3044 case TargetOpcode::G_INDEXED_SEXTLOAD:
3045 return selectIndexedExtLoad(I, MRI);
3046 case TargetOpcode::G_INDEXED_LOAD:
3047 return selectIndexedLoad(I, MRI);
3048 case TargetOpcode::G_INDEXED_STORE:
3049 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3050
3051 case TargetOpcode::G_LSHR:
3052 case TargetOpcode::G_ASHR:
3053 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3054 return selectVectorAshrLshr(I, MRI);
3055 [[fallthrough]];
3056 case TargetOpcode::G_SHL:
3057 if (Opcode == TargetOpcode::G_SHL &&
3058 MRI.getType(I.getOperand(0).getReg()).isVector())
3059 return selectVectorSHL(I, MRI);
3060
3061 // These shifts were legalized to have 64 bit shift amounts because we
3062 // want to take advantage of the selection patterns that assume the
3063 // immediates are s64s, however, selectBinaryOp will assume both operands
3064 // will have the same bit size.
3065 {
3066 Register SrcReg = I.getOperand(1).getReg();
3067 Register ShiftReg = I.getOperand(2).getReg();
3068 const LLT ShiftTy = MRI.getType(ShiftReg);
3069 const LLT SrcTy = MRI.getType(SrcReg);
3070 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3071 ShiftTy.getSizeInBits() == 64) {
3072 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3073 // Insert a subregister copy to implement a 64->32 trunc
3074 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3075 .addReg(ShiftReg, 0, AArch64::sub_32);
3076 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3077 I.getOperand(2).setReg(Trunc.getReg(0));
3078 }
3079 }
3080 [[fallthrough]];
3081 case TargetOpcode::G_OR: {
3082 // Reject the various things we don't support yet.
3083 if (unsupportedBinOp(I, RBI, MRI, TRI))
3084 return false;
3085
3086 const unsigned OpSize = Ty.getSizeInBits();
3087
3088 const Register DefReg = I.getOperand(0).getReg();
3089 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3090
3091 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3092 if (NewOpc == I.getOpcode())
3093 return false;
3094
3095 I.setDesc(TII.get(NewOpc));
3096 // FIXME: Should the type be always reset in setDesc?
3097
3098 // Now that we selected an opcode, we need to constrain the register
3099 // operands to use appropriate classes.
3101 }
3102
3103 case TargetOpcode::G_PTR_ADD: {
3104 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3105 I.eraseFromParent();
3106 return true;
3107 }
3108
3109 case TargetOpcode::G_SADDE:
3110 case TargetOpcode::G_UADDE:
3111 case TargetOpcode::G_SSUBE:
3112 case TargetOpcode::G_USUBE:
3113 case TargetOpcode::G_SADDO:
3114 case TargetOpcode::G_UADDO:
3115 case TargetOpcode::G_SSUBO:
3116 case TargetOpcode::G_USUBO:
3117 return selectOverflowOp(I, MRI);
3118
3119 case TargetOpcode::G_PTRMASK: {
3120 Register MaskReg = I.getOperand(2).getReg();
3121 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3122 // TODO: Implement arbitrary cases
3123 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3124 return false;
3125
3126 uint64_t Mask = *MaskVal;
3127 I.setDesc(TII.get(AArch64::ANDXri));
3128 I.getOperand(2).ChangeToImmediate(
3130
3132 }
3133 case TargetOpcode::G_PTRTOINT:
3134 case TargetOpcode::G_TRUNC: {
3135 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3136 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3137
3138 const Register DstReg = I.getOperand(0).getReg();
3139 const Register SrcReg = I.getOperand(1).getReg();
3140
3141 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3142 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3143
3144 if (DstRB.getID() != SrcRB.getID()) {
3145 LLVM_DEBUG(
3146 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3147 return false;
3148 }
3149
3150 if (DstRB.getID() == AArch64::GPRRegBankID) {
3151 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3152 if (!DstRC)
3153 return false;
3154
3155 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3156 if (!SrcRC)
3157 return false;
3158
3159 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3160 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3161 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3162 return false;
3163 }
3164
3165 if (DstRC == SrcRC) {
3166 // Nothing to be done
3167 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3168 SrcTy == LLT::scalar(64)) {
3169 llvm_unreachable("TableGen can import this case");
3170 return false;
3171 } else if (DstRC == &AArch64::GPR32RegClass &&
3172 SrcRC == &AArch64::GPR64RegClass) {
3173 I.getOperand(1).setSubReg(AArch64::sub_32);
3174 } else {
3175 LLVM_DEBUG(
3176 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3177 return false;
3178 }
3179
3180 I.setDesc(TII.get(TargetOpcode::COPY));
3181 return true;
3182 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3183 if (DstTy == LLT::fixed_vector(4, 16) &&
3184 SrcTy == LLT::fixed_vector(4, 32)) {
3185 I.setDesc(TII.get(AArch64::XTNv4i16));
3187 return true;
3188 }
3189
3190 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3191 MachineInstr *Extract = emitExtractVectorElt(
3192 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3193 if (!Extract)
3194 return false;
3195 I.eraseFromParent();
3196 return true;
3197 }
3198
3199 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3200 if (Opcode == TargetOpcode::G_PTRTOINT) {
3201 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3202 I.setDesc(TII.get(TargetOpcode::COPY));
3203 return selectCopy(I, TII, MRI, TRI, RBI);
3204 }
3205 }
3206
3207 return false;
3208 }
3209
3210 case TargetOpcode::G_ANYEXT: {
3211 if (selectUSMovFromExtend(I, MRI))
3212 return true;
3213
3214 const Register DstReg = I.getOperand(0).getReg();
3215 const Register SrcReg = I.getOperand(1).getReg();
3216
3217 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3218 if (RBDst.getID() != AArch64::GPRRegBankID) {
3219 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3220 << ", expected: GPR\n");
3221 return false;
3222 }
3223
3224 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3225 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3226 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3227 << ", expected: GPR\n");
3228 return false;
3229 }
3230
3231 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3232
3233 if (DstSize == 0) {
3234 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3235 return false;
3236 }
3237
3238 if (DstSize != 64 && DstSize > 32) {
3239 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3240 << ", expected: 32 or 64\n");
3241 return false;
3242 }
3243 // At this point G_ANYEXT is just like a plain COPY, but we need
3244 // to explicitly form the 64-bit value if any.
3245 if (DstSize > 32) {
3246 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3247 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3248 .addDef(ExtSrc)
3249 .addImm(0)
3250 .addUse(SrcReg)
3251 .addImm(AArch64::sub_32);
3252 I.getOperand(1).setReg(ExtSrc);
3253 }
3254 return selectCopy(I, TII, MRI, TRI, RBI);
3255 }
3256
3257 case TargetOpcode::G_ZEXT:
3258 case TargetOpcode::G_SEXT_INREG:
3259 case TargetOpcode::G_SEXT: {
3260 if (selectUSMovFromExtend(I, MRI))
3261 return true;
3262
3263 unsigned Opcode = I.getOpcode();
3264 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3265 const Register DefReg = I.getOperand(0).getReg();
3266 Register SrcReg = I.getOperand(1).getReg();
3267 const LLT DstTy = MRI.getType(DefReg);
3268 const LLT SrcTy = MRI.getType(SrcReg);
3269 unsigned DstSize = DstTy.getSizeInBits();
3270 unsigned SrcSize = SrcTy.getSizeInBits();
3271
3272 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3273 // extended is encoded in the imm.
3274 if (Opcode == TargetOpcode::G_SEXT_INREG)
3275 SrcSize = I.getOperand(2).getImm();
3276
3277 if (DstTy.isVector())
3278 return false; // Should be handled by imported patterns.
3279
3280 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3281 AArch64::GPRRegBankID &&
3282 "Unexpected ext regbank");
3283
3284 MachineInstr *ExtI;
3285
3286 // First check if we're extending the result of a load which has a dest type
3287 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3288 // GPR register on AArch64 and all loads which are smaller automatically
3289 // zero-extend the upper bits. E.g.
3290 // %v(s8) = G_LOAD %p, :: (load 1)
3291 // %v2(s32) = G_ZEXT %v(s8)
3292 if (!IsSigned) {
3293 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3294 bool IsGPR =
3295 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3296 if (LoadMI && IsGPR) {
3297 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3298 unsigned BytesLoaded = MemOp->getSize().getValue();
3299 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3300 return selectCopy(I, TII, MRI, TRI, RBI);
3301 }
3302
3303 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3304 // + SUBREG_TO_REG.
3305 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3306 Register SubregToRegSrc =
3307 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3308 const Register ZReg = AArch64::WZR;
3309 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3310 .addImm(0);
3311
3312 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3313 .addImm(0)
3314 .addUse(SubregToRegSrc)
3315 .addImm(AArch64::sub_32);
3316
3317 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3318 MRI)) {
3319 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3320 return false;
3321 }
3322
3323 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3324 MRI)) {
3325 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3326 return false;
3327 }
3328
3329 I.eraseFromParent();
3330 return true;
3331 }
3332 }
3333
3334 if (DstSize == 64) {
3335 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3336 // FIXME: Can we avoid manually doing this?
3337 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3338 MRI)) {
3339 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3340 << " operand\n");
3341 return false;
3342 }
3343 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3344 {&AArch64::GPR64RegClass}, {})
3345 .addImm(0)
3346 .addUse(SrcReg)
3347 .addImm(AArch64::sub_32)
3348 .getReg(0);
3349 }
3350
3351 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3352 {DefReg}, {SrcReg})
3353 .addImm(0)
3354 .addImm(SrcSize - 1);
3355 } else if (DstSize <= 32) {
3356 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3357 {DefReg}, {SrcReg})
3358 .addImm(0)
3359 .addImm(SrcSize - 1);
3360 } else {
3361 return false;
3362 }
3363
3365 I.eraseFromParent();
3366 return true;
3367 }
3368
3369 case TargetOpcode::G_SITOFP:
3370 case TargetOpcode::G_UITOFP:
3371 case TargetOpcode::G_FPTOSI:
3372 case TargetOpcode::G_FPTOUI: {
3373 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3374 SrcTy = MRI.getType(I.getOperand(1).getReg());
3375 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3376 if (NewOpc == Opcode)
3377 return false;
3378
3379 I.setDesc(TII.get(NewOpc));
3381 I.setFlags(MachineInstr::NoFPExcept);
3382
3383 return true;
3384 }
3385
3386 case TargetOpcode::G_FREEZE:
3387 return selectCopy(I, TII, MRI, TRI, RBI);
3388
3389 case TargetOpcode::G_INTTOPTR:
3390 // The importer is currently unable to import pointer types since they
3391 // didn't exist in SelectionDAG.
3392 return selectCopy(I, TII, MRI, TRI, RBI);
3393
3394 case TargetOpcode::G_BITCAST:
3395 // Imported SelectionDAG rules can handle every bitcast except those that
3396 // bitcast from a type to the same type. Ideally, these shouldn't occur
3397 // but we might not run an optimizer that deletes them. The other exception
3398 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3399 // of them.
3400 return selectCopy(I, TII, MRI, TRI, RBI);
3401
3402 case TargetOpcode::G_SELECT: {
3403 auto &Sel = cast<GSelect>(I);
3404 const Register CondReg = Sel.getCondReg();
3405 const Register TReg = Sel.getTrueReg();
3406 const Register FReg = Sel.getFalseReg();
3407
3408 if (tryOptSelect(Sel))
3409 return true;
3410
3411 // Make sure to use an unused vreg instead of wzr, so that the peephole
3412 // optimizations will be able to optimize these.
3413 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3414 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3415 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3417 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3418 return false;
3419 Sel.eraseFromParent();
3420 return true;
3421 }
3422 case TargetOpcode::G_ICMP: {
3423 if (Ty.isVector())
3424 return false;
3425
3426 if (Ty != LLT::scalar(32)) {
3427 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3428 << ", expected: " << LLT::scalar(32) << '\n');
3429 return false;
3430 }
3431
3432 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3433 const AArch64CC::CondCode InvCC =
3435 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3436 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3437 /*Src2=*/AArch64::WZR, InvCC, MIB);
3438 I.eraseFromParent();
3439 return true;
3440 }
3441
3442 case TargetOpcode::G_FCMP: {
3443 CmpInst::Predicate Pred =
3444 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3445 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3446 Pred) ||
3447 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3448 return false;
3449 I.eraseFromParent();
3450 return true;
3451 }
3452 case TargetOpcode::G_VASTART:
3453 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3454 : selectVaStartAAPCS(I, MF, MRI);
3455 case TargetOpcode::G_INTRINSIC:
3456 return selectIntrinsic(I, MRI);
3457 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3458 return selectIntrinsicWithSideEffects(I, MRI);
3459 case TargetOpcode::G_IMPLICIT_DEF: {
3460 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3461 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3462 const Register DstReg = I.getOperand(0).getReg();
3463 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3464 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3465 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3466 return true;
3467 }
3468 case TargetOpcode::G_BLOCK_ADDR: {
3469 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3470 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3471 I.eraseFromParent();
3472 return true;
3473 } else {
3474 I.setDesc(TII.get(AArch64::MOVaddrBA));
3475 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3476 I.getOperand(0).getReg())
3477 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3478 /* Offset */ 0, AArch64II::MO_PAGE)
3480 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3482 I.eraseFromParent();
3483 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3484 }
3485 }
3486 case AArch64::G_DUP: {
3487 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3488 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3489 // difficult because at RBS we may end up pessimizing the fpr case if we
3490 // decided to add an anyextend to fix this. Manual selection is the most
3491 // robust solution for now.
3492 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3493 AArch64::GPRRegBankID)
3494 return false; // We expect the fpr regbank case to be imported.
3495 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3496 if (VecTy == LLT::fixed_vector(8, 8))
3497 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3498 else if (VecTy == LLT::fixed_vector(16, 8))
3499 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3500 else if (VecTy == LLT::fixed_vector(4, 16))
3501 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3502 else if (VecTy == LLT::fixed_vector(8, 16))
3503 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3504 else
3505 return false;
3507 }
3508 case TargetOpcode::G_BUILD_VECTOR:
3509 return selectBuildVector(I, MRI);
3510 case TargetOpcode::G_MERGE_VALUES:
3511 return selectMergeValues(I, MRI);
3512 case TargetOpcode::G_UNMERGE_VALUES:
3513 return selectUnmergeValues(I, MRI);
3514 case TargetOpcode::G_SHUFFLE_VECTOR:
3515 return selectShuffleVector(I, MRI);
3516 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3517 return selectExtractElt(I, MRI);
3518 case TargetOpcode::G_CONCAT_VECTORS:
3519 return selectConcatVectors(I, MRI);
3520 case TargetOpcode::G_JUMP_TABLE:
3521 return selectJumpTable(I, MRI);
3522 case TargetOpcode::G_MEMCPY:
3523 case TargetOpcode::G_MEMCPY_INLINE:
3524 case TargetOpcode::G_MEMMOVE:
3525 case TargetOpcode::G_MEMSET:
3526 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3527 return selectMOPS(I, MRI);
3528 }
3529
3530 return false;
3531}
3532
3533bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3534 MachineIRBuilderState OldMIBState = MIB.getState();
3535 bool Success = select(I);
3536 MIB.setState(OldMIBState);
3537 return Success;
3538}
3539
3540bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3542 unsigned Mopcode;
3543 switch (GI.getOpcode()) {
3544 case TargetOpcode::G_MEMCPY:
3545 case TargetOpcode::G_MEMCPY_INLINE:
3546 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3547 break;
3548 case TargetOpcode::G_MEMMOVE:
3549 Mopcode = AArch64::MOPSMemoryMovePseudo;
3550 break;
3551 case TargetOpcode::G_MEMSET:
3552 // For tagged memset see llvm.aarch64.mops.memset.tag
3553 Mopcode = AArch64::MOPSMemorySetPseudo;
3554 break;
3555 }
3556
3557 auto &DstPtr = GI.getOperand(0);
3558 auto &SrcOrVal = GI.getOperand(1);
3559 auto &Size = GI.getOperand(2);
3560
3561 // Create copies of the registers that can be clobbered.
3562 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3563 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3564 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3565
3566 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3567 const auto &SrcValRegClass =
3568 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3569
3570 // Constrain to specific registers
3571 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3572 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3573 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3574
3575 MIB.buildCopy(DstPtrCopy, DstPtr);
3576 MIB.buildCopy(SrcValCopy, SrcOrVal);
3577 MIB.buildCopy(SizeCopy, Size);
3578
3579 // New instruction uses the copied registers because it must update them.
3580 // The defs are not used since they don't exist in G_MEM*. They are still
3581 // tied.
3582 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3583 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3584 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3585 if (IsSet) {
3586 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3587 {DstPtrCopy, SizeCopy, SrcValCopy});
3588 } else {
3589 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3590 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3591 {DstPtrCopy, SrcValCopy, SizeCopy});
3592 }
3593
3594 GI.eraseFromParent();
3595 return true;
3596}
3597
3598bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3600 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3601 Register JTAddr = I.getOperand(0).getReg();
3602 unsigned JTI = I.getOperand(1).getIndex();
3603 Register Index = I.getOperand(2).getReg();
3604
3605 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3606 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3607
3608 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3609 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3610 {TargetReg, ScratchReg}, {JTAddr, Index})
3611 .addJumpTableIndex(JTI);
3612 // Save the jump table info.
3613 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3614 {static_cast<int64_t>(JTI)});
3615 // Build the indirect branch.
3616 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3617 I.eraseFromParent();
3618 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3619}
3620
3621bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3623 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3624 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3625
3626 Register DstReg = I.getOperand(0).getReg();
3627 unsigned JTI = I.getOperand(1).getIndex();
3628 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3629 auto MovMI =
3630 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3631 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3633 I.eraseFromParent();
3634 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3635}
3636
3637bool AArch64InstructionSelector::selectTLSGlobalValue(
3639 if (!STI.isTargetMachO())
3640 return false;
3641 MachineFunction &MF = *I.getParent()->getParent();
3642 MF.getFrameInfo().setAdjustsStack(true);
3643
3644 const auto &GlobalOp = I.getOperand(1);
3645 assert(GlobalOp.getOffset() == 0 &&
3646 "Shouldn't have an offset on TLS globals!");
3647 const GlobalValue &GV = *GlobalOp.getGlobal();
3648
3649 auto LoadGOT =
3650 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3651 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3652
3653 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3654 {LoadGOT.getReg(0)})
3655 .addImm(0);
3656
3657 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3658 // TLS calls preserve all registers except those that absolutely must be
3659 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3660 // silly).
3661 unsigned Opcode = getBLRCallOpcode(MF);
3662
3663 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3664 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3665 assert(Opcode == AArch64::BLR);
3666 Opcode = AArch64::BLRAAZ;
3667 }
3668
3669 MIB.buildInstr(Opcode, {}, {Load})
3670 .addUse(AArch64::X0, RegState::Implicit)
3671 .addDef(AArch64::X0, RegState::Implicit)
3672 .addRegMask(TRI.getTLSCallPreservedMask());
3673
3674 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3675 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3676 MRI);
3677 I.eraseFromParent();
3678 return true;
3679}
3680
3681MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3682 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3683 MachineIRBuilder &MIRBuilder) const {
3684 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3685
3686 auto BuildFn = [&](unsigned SubregIndex) {
3687 auto Ins =
3688 MIRBuilder
3689 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3690 .addImm(SubregIndex);
3693 return &*Ins;
3694 };
3695
3696 switch (EltSize) {
3697 case 8:
3698 return BuildFn(AArch64::bsub);
3699 case 16:
3700 return BuildFn(AArch64::hsub);
3701 case 32:
3702 return BuildFn(AArch64::ssub);
3703 case 64:
3704 return BuildFn(AArch64::dsub);
3705 default:
3706 return nullptr;
3707 }
3708}
3709
3711AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3712 MachineIRBuilder &MIB,
3713 MachineRegisterInfo &MRI) const {
3714 LLT DstTy = MRI.getType(DstReg);
3715 const TargetRegisterClass *RC =
3716 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3717 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3718 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3719 return nullptr;
3720 }
3721 unsigned SubReg = 0;
3722 if (!getSubRegForClass(RC, TRI, SubReg))
3723 return nullptr;
3724 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3725 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3726 << DstTy.getSizeInBits() << "\n");
3727 return nullptr;
3728 }
3729 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3730 .addReg(SrcReg, 0, SubReg);
3731 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3732 return Copy;
3733}
3734
3735bool AArch64InstructionSelector::selectMergeValues(
3737 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3738 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3739 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3740 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3741 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3742
3743 if (I.getNumOperands() != 3)
3744 return false;
3745
3746 // Merging 2 s64s into an s128.
3747 if (DstTy == LLT::scalar(128)) {
3748 if (SrcTy.getSizeInBits() != 64)
3749 return false;
3750 Register DstReg = I.getOperand(0).getReg();
3751 Register Src1Reg = I.getOperand(1).getReg();
3752 Register Src2Reg = I.getOperand(2).getReg();
3753 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3754 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3755 /* LaneIdx */ 0, RB, MIB);
3756 if (!InsMI)
3757 return false;
3758 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3759 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3760 if (!Ins2MI)
3761 return false;
3764 I.eraseFromParent();
3765 return true;
3766 }
3767
3768 if (RB.getID() != AArch64::GPRRegBankID)
3769 return false;
3770
3771 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3772 return false;
3773
3774 auto *DstRC = &AArch64::GPR64RegClass;
3775 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3776 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3777 TII.get(TargetOpcode::SUBREG_TO_REG))
3778 .addDef(SubToRegDef)
3779 .addImm(0)
3780 .addUse(I.getOperand(1).getReg())
3781 .addImm(AArch64::sub_32);
3782 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3783 // Need to anyext the second scalar before we can use bfm
3784 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3785 TII.get(TargetOpcode::SUBREG_TO_REG))
3786 .addDef(SubToRegDef2)
3787 .addImm(0)
3788 .addUse(I.getOperand(2).getReg())
3789 .addImm(AArch64::sub_32);
3790 MachineInstr &BFM =
3791 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3792 .addDef(I.getOperand(0).getReg())
3793 .addUse(SubToRegDef)
3794 .addUse(SubToRegDef2)
3795 .addImm(32)
3796 .addImm(31);
3797 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3798 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3800 I.eraseFromParent();
3801 return true;
3802}
3803
3804static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3805 const unsigned EltSize) {
3806 // Choose a lane copy opcode and subregister based off of the size of the
3807 // vector's elements.
3808 switch (EltSize) {
3809 case 8:
3810 CopyOpc = AArch64::DUPi8;
3811 ExtractSubReg = AArch64::bsub;
3812 break;
3813 case 16:
3814 CopyOpc = AArch64::DUPi16;
3815 ExtractSubReg = AArch64::hsub;
3816 break;
3817 case 32:
3818 CopyOpc = AArch64::DUPi32;
3819 ExtractSubReg = AArch64::ssub;
3820 break;
3821 case 64:
3822 CopyOpc = AArch64::DUPi64;
3823 ExtractSubReg = AArch64::dsub;
3824 break;
3825 default:
3826 // Unknown size, bail out.
3827 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3828 return false;
3829 }
3830 return true;
3831}
3832
3833MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3834 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3835 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3836 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3837 unsigned CopyOpc = 0;
3838 unsigned ExtractSubReg = 0;
3839 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3840 LLVM_DEBUG(
3841 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3842 return nullptr;
3843 }
3844
3845 const TargetRegisterClass *DstRC =
3846 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3847 if (!DstRC) {
3848 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3849 return nullptr;
3850 }
3851
3852 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3853 const LLT &VecTy = MRI.getType(VecReg);
3854 const TargetRegisterClass *VecRC =
3855 getRegClassForTypeOnBank(VecTy, VecRB, true);
3856 if (!VecRC) {
3857 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3858 return nullptr;
3859 }
3860
3861 // The register that we're going to copy into.
3862 Register InsertReg = VecReg;
3863 if (!DstReg)
3864 DstReg = MRI.createVirtualRegister(DstRC);
3865 // If the lane index is 0, we just use a subregister COPY.
3866 if (LaneIdx == 0) {
3867 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3868 .addReg(VecReg, 0, ExtractSubReg);
3869 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3870 return &*Copy;
3871 }
3872
3873 // Lane copies require 128-bit wide registers. If we're dealing with an
3874 // unpacked vector, then we need to move up to that width. Insert an implicit
3875 // def and a subregister insert to get us there.
3876 if (VecTy.getSizeInBits() != 128) {
3877 MachineInstr *ScalarToVector = emitScalarToVector(
3878 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3879 if (!ScalarToVector)
3880 return nullptr;
3881 InsertReg = ScalarToVector->getOperand(0).getReg();
3882 }
3883
3884 MachineInstr *LaneCopyMI =
3885 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3886 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3887
3888 // Make sure that we actually constrain the initial copy.
3889 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3890 return LaneCopyMI;
3891}
3892
3893bool AArch64InstructionSelector::selectExtractElt(
3895 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3896 "unexpected opcode!");
3897 Register DstReg = I.getOperand(0).getReg();
3898 const LLT NarrowTy = MRI.getType(DstReg);
3899 const Register SrcReg = I.getOperand(1).getReg();
3900 const LLT WideTy = MRI.getType(SrcReg);
3901 (void)WideTy;
3902 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3903 "source register size too small!");
3904 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3905
3906 // Need the lane index to determine the correct copy opcode.
3907 MachineOperand &LaneIdxOp = I.getOperand(2);
3908 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3909
3910 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3911 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3912 return false;
3913 }
3914
3915 // Find the index to extract from.
3916 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3917 if (!VRegAndVal)
3918 return false;
3919 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3920
3921
3922 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3923 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3924 LaneIdx, MIB);
3925 if (!Extract)
3926 return false;
3927
3928 I.eraseFromParent();
3929 return true;
3930}
3931
3932bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3934 unsigned NumElts = I.getNumOperands() - 1;
3935 Register SrcReg = I.getOperand(NumElts).getReg();
3936 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3937 const LLT SrcTy = MRI.getType(SrcReg);
3938
3939 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3940 if (SrcTy.getSizeInBits() > 128) {
3941 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3942 return false;
3943 }
3944
3945 // We implement a split vector operation by treating the sub-vectors as
3946 // scalars and extracting them.
3947 const RegisterBank &DstRB =
3948 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3949 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3950 Register Dst = I.getOperand(OpIdx).getReg();
3951 MachineInstr *Extract =
3952 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3953 if (!Extract)
3954 return false;
3955 }
3956 I.eraseFromParent();
3957 return true;
3958}
3959
3960bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
3962 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3963 "unexpected opcode");
3964
3965 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3966 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3967 AArch64::FPRRegBankID ||
3968 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3969 AArch64::FPRRegBankID) {
3970 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3971 "currently unsupported.\n");
3972 return false;
3973 }
3974
3975 // The last operand is the vector source register, and every other operand is
3976 // a register to unpack into.
3977 unsigned NumElts = I.getNumOperands() - 1;
3978 Register SrcReg = I.getOperand(NumElts).getReg();
3979 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3980 const LLT WideTy = MRI.getType(SrcReg);
3981 (void)WideTy;
3982 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3983 "can only unmerge from vector or s128 types!");
3984 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3985 "source register size too small!");
3986
3987 if (!NarrowTy.isScalar())
3988 return selectSplitVectorUnmerge(I, MRI);
3989
3990 // Choose a lane copy opcode and subregister based off of the size of the
3991 // vector's elements.
3992 unsigned CopyOpc = 0;
3993 unsigned ExtractSubReg = 0;
3994 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3995 return false;
3996
3997 // Set up for the lane copies.
3998 MachineBasicBlock &MBB = *I.getParent();
3999
4000 // Stores the registers we'll be copying from.
4001 SmallVector<Register, 4> InsertRegs;
4002
4003 // We'll use the first register twice, so we only need NumElts-1 registers.
4004 unsigned NumInsertRegs = NumElts - 1;
4005
4006 // If our elements fit into exactly 128 bits, then we can copy from the source
4007 // directly. Otherwise, we need to do a bit of setup with some subregister
4008 // inserts.
4009 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4010 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4011 } else {
4012 // No. We have to perform subregister inserts. For each insert, create an
4013 // implicit def and a subregister insert, and save the register we create.
4014 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4015 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4016 *RBI.getRegBank(SrcReg, MRI, TRI));
4017 unsigned SubReg = 0;
4018 bool Found = getSubRegForClass(RC, TRI, SubReg);
4019 (void)Found;
4020 assert(Found && "expected to find last operand's subeg idx");
4021 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4022 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4023 MachineInstr &ImpDefMI =
4024 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4025 ImpDefReg);
4026
4027 // Now, create the subregister insert from SrcReg.
4028 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4029 MachineInstr &InsMI =
4030 *BuildMI(MBB, I, I.getDebugLoc(),
4031 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4032 .addUse(ImpDefReg)
4033 .addUse(SrcReg)
4034 .addImm(SubReg);
4035
4036 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4038
4039 // Save the register so that we can copy from it after.
4040 InsertRegs.push_back(InsertReg);
4041 }
4042 }
4043
4044 // Now that we've created any necessary subregister inserts, we can
4045 // create the copies.
4046 //
4047 // Perform the first copy separately as a subregister copy.
4048 Register CopyTo = I.getOperand(0).getReg();
4049 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4050 .addReg(InsertRegs[0], 0, ExtractSubReg);
4051 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4052
4053 // Now, perform the remaining copies as vector lane copies.
4054 unsigned LaneIdx = 1;
4055 for (Register InsReg : InsertRegs) {
4056 Register CopyTo = I.getOperand(LaneIdx).getReg();
4057 MachineInstr &CopyInst =
4058 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4059 .addUse(InsReg)
4060 .addImm(LaneIdx);
4061 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4062 ++LaneIdx;
4063 }
4064
4065 // Separately constrain the first copy's destination. Because of the
4066 // limitation in constrainOperandRegClass, we can't guarantee that this will
4067 // actually be constrained. So, do it ourselves using the second operand.
4068 const TargetRegisterClass *RC =
4069 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4070 if (!RC) {
4071 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4072 return false;
4073 }
4074
4075 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4076 I.eraseFromParent();
4077 return true;
4078}
4079
4080bool AArch64InstructionSelector::selectConcatVectors(
4082 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4083 "Unexpected opcode");
4084 Register Dst = I.getOperand(0).getReg();
4085 Register Op1 = I.getOperand(1).getReg();
4086 Register Op2 = I.getOperand(2).getReg();
4087 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4088 if (!ConcatMI)
4089 return false;
4090 I.eraseFromParent();
4091 return true;
4092}
4093
4094unsigned
4095AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4096 MachineFunction &MF) const {
4097 Type *CPTy = CPVal->getType();
4098 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4099
4101 return MCP->getConstantPoolIndex(CPVal, Alignment);
4102}
4103
4104MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4105 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4106 const TargetRegisterClass *RC;
4107 unsigned Opc;
4108 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4109 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4110 switch (Size) {
4111 case 16:
4112 RC = &AArch64::FPR128RegClass;
4113 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4114 break;
4115 case 8:
4116 RC = &AArch64::FPR64RegClass;
4117 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4118 break;
4119 case 4:
4120 RC = &AArch64::FPR32RegClass;
4121 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4122 break;
4123 case 2:
4124 RC = &AArch64::FPR16RegClass;
4125 Opc = AArch64::LDRHui;
4126 break;
4127 default:
4128 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4129 << *CPVal->getType());
4130 return nullptr;
4131 }
4132
4133 MachineInstr *LoadMI = nullptr;
4134 auto &MF = MIRBuilder.getMF();
4135 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4136 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4137 // Use load(literal) for tiny code model.
4138 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4139 } else {
4140 auto Adrp =
4141 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4142 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4143
4144 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4145 .addConstantPoolIndex(
4147
4149 }
4150
4152 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4154 Size, Align(Size)));
4156 return LoadMI;
4157}
4158
4159/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4160/// size and RB.
4161static std::pair<unsigned, unsigned>
4162getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4163 unsigned Opc, SubregIdx;
4164 if (RB.getID() == AArch64::GPRRegBankID) {
4165 if (EltSize == 8) {
4166 Opc = AArch64::INSvi8gpr;
4167 SubregIdx = AArch64::bsub;
4168 } else if (EltSize == 16) {
4169 Opc = AArch64::INSvi16gpr;
4170 SubregIdx = AArch64::ssub;
4171 } else if (EltSize == 32) {
4172 Opc = AArch64::INSvi32gpr;
4173 SubregIdx = AArch64::ssub;
4174 } else if (EltSize == 64) {
4175 Opc = AArch64::INSvi64gpr;
4176 SubregIdx = AArch64::dsub;
4177 } else {
4178 llvm_unreachable("invalid elt size!");
4179 }
4180 } else {
4181 if (EltSize == 8) {
4182 Opc = AArch64::INSvi8lane;
4183 SubregIdx = AArch64::bsub;
4184 } else if (EltSize == 16) {
4185 Opc = AArch64::INSvi16lane;
4186 SubregIdx = AArch64::hsub;
4187 } else if (EltSize == 32) {
4188 Opc = AArch64::INSvi32lane;
4189 SubregIdx = AArch64::ssub;
4190 } else if (EltSize == 64) {
4191 Opc = AArch64::INSvi64lane;
4192 SubregIdx = AArch64::dsub;
4193 } else {
4194 llvm_unreachable("invalid elt size!");
4195 }
4196 }
4197 return std::make_pair(Opc, SubregIdx);
4198}
4199
4200MachineInstr *AArch64InstructionSelector::emitInstr(
4201 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4202 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4203 const ComplexRendererFns &RenderFns) const {
4204 assert(Opcode && "Expected an opcode?");
4205 assert(!isPreISelGenericOpcode(Opcode) &&
4206 "Function should only be used to produce selected instructions!");
4207 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4208 if (RenderFns)
4209 for (auto &Fn : *RenderFns)
4210 Fn(MI);
4212 return &*MI;
4213}
4214
4215MachineInstr *AArch64InstructionSelector::emitAddSub(
4216 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4217 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4218 MachineIRBuilder &MIRBuilder) const {
4219 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4220 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4221 auto Ty = MRI.getType(LHS.getReg());
4222 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4223 unsigned Size = Ty.getSizeInBits();
4224 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4225 bool Is32Bit = Size == 32;
4226
4227 // INSTRri form with positive arithmetic immediate.
4228 if (auto Fns = selectArithImmed(RHS))
4229 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4230 MIRBuilder, Fns);
4231
4232 // INSTRri form with negative arithmetic immediate.
4233 if (auto Fns = selectNegArithImmed(RHS))
4234 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4235 MIRBuilder, Fns);
4236
4237 // INSTRrx form.
4238 if (auto Fns = selectArithExtendedRegister(RHS))
4239 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4240 MIRBuilder, Fns);
4241
4242 // INSTRrs form.
4243 if (auto Fns = selectShiftedRegister(RHS))
4244 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4245 MIRBuilder, Fns);
4246 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4247 MIRBuilder);
4248}
4249
4251AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4252 MachineOperand &RHS,
4253 MachineIRBuilder &MIRBuilder) const {
4254 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4255 {{AArch64::ADDXri, AArch64::ADDWri},
4256 {AArch64::ADDXrs, AArch64::ADDWrs},
4257 {AArch64::ADDXrr, AArch64::ADDWrr},
4258 {AArch64::SUBXri, AArch64::SUBWri},
4259 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4260 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4261}
4262
4264AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4265 MachineOperand &RHS,
4266 MachineIRBuilder &MIRBuilder) const {
4267 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4268 {{AArch64::ADDSXri, AArch64::ADDSWri},
4269 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4270 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4271 {AArch64::SUBSXri, AArch64::SUBSWri},
4272 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4273 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4274}
4275
4277AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4278 MachineOperand &RHS,
4279 MachineIRBuilder &MIRBuilder) const {
4280 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4281 {{AArch64::SUBSXri, AArch64::SUBSWri},
4282 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4283 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4284 {AArch64::ADDSXri, AArch64::ADDSWri},
4285 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4286 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4287}
4288
4290AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4291 MachineOperand &RHS,
4292 MachineIRBuilder &MIRBuilder) const {
4293 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4294 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4295 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4296 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4297 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4298}
4299
4301AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4302 MachineOperand &RHS,
4303 MachineIRBuilder &MIRBuilder) const {
4304 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4305 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4306 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4307 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4308 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4309}
4310
4312AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4313 MachineIRBuilder &MIRBuilder) const {
4314 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4315 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4316 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4317 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4318}
4319
4321AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4322 MachineIRBuilder &MIRBuilder) const {
4323 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4324 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4325 LLT Ty = MRI.getType(LHS.getReg());
4326 unsigned RegSize = Ty.getSizeInBits();
4327 bool Is32Bit = (RegSize == 32);
4328 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4329 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4330 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4331 // ANDS needs a logical immediate for its immediate form. Check if we can
4332 // fold one in.
4333 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4334 int64_t Imm = ValAndVReg->Value.getSExtValue();
4335
4337 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4340 return &*TstMI;
4341 }
4342 }
4343
4344 if (auto Fns = selectLogicalShiftedRegister(RHS))
4345 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4346 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4347}
4348
4349MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4350 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4351 MachineIRBuilder &MIRBuilder) const {
4352 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4353 assert(Predicate.isPredicate() && "Expected predicate?");
4354 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4355 LLT CmpTy = MRI.getType(LHS.getReg());
4356 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4357 unsigned Size = CmpTy.getSizeInBits();
4358 (void)Size;
4359 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4360 // Fold the compare into a cmn or tst if possible.
4361 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4362 return FoldCmp;
4363 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4364 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4365}
4366
4367MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4368 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4369 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4370#ifndef NDEBUG
4371 LLT Ty = MRI.getType(Dst);
4372 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4373 "Expected a 32-bit scalar register?");
4374#endif
4375 const Register ZReg = AArch64::WZR;
4376 AArch64CC::CondCode CC1, CC2;
4377 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4378 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4379 if (CC2 == AArch64CC::AL)
4380 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4381 MIRBuilder);
4382 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4383 Register Def1Reg = MRI.createVirtualRegister(RC);
4384 Register Def2Reg = MRI.createVirtualRegister(RC);
4385 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4386 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4387 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4388 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4390 return &*OrMI;
4391}
4392
4393MachineInstr *AArch64InstructionSelector::emitFPCompare(
4394 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4395 std::optional<CmpInst::Predicate> Pred) const {
4396 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4397 LLT Ty = MRI.getType(LHS);
4398 if (Ty.isVector())
4399 return nullptr;
4400 unsigned OpSize = Ty.getSizeInBits();
4401 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4402
4403 // If this is a compare against +0.0, then we don't have
4404 // to explicitly materialize a constant.
4405 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4406 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4407
4408 auto IsEqualityPred = [](CmpInst::Predicate P) {
4409 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4411 };
4412 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4413 // Try commutating the operands.
4414 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4415 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4416 ShouldUseImm = true;
4417 std::swap(LHS, RHS);
4418 }
4419 }
4420 unsigned CmpOpcTbl[2][3] = {
4421 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4422 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4423 unsigned CmpOpc =
4424 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4425
4426 // Partially build the compare. Decide if we need to add a use for the
4427 // third operand based off whether or not we're comparing against 0.0.
4428 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4430 if (!ShouldUseImm)
4431 CmpMI.addUse(RHS);
4433 return &*CmpMI;
4434}
4435
4436MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4437 std::optional<Register> Dst, Register Op1, Register Op2,
4438 MachineIRBuilder &MIRBuilder) const {
4439 // We implement a vector concat by:
4440 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4441 // 2. Insert the upper vector into the destination's upper element
4442 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4443 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4444
4445 const LLT Op1Ty = MRI.getType(Op1);
4446 const LLT Op2Ty = MRI.getType(Op2);
4447
4448 if (Op1Ty != Op2Ty) {
4449 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4450 return nullptr;
4451 }
4452 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4453
4454 if (Op1Ty.getSizeInBits() >= 128) {
4455 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4456 return nullptr;
4457 }
4458
4459 // At the moment we just support 64 bit vector concats.
4460 if (Op1Ty.getSizeInBits() != 64) {
4461 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4462 return nullptr;
4463 }
4464
4465 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4466 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4467 const TargetRegisterClass *DstRC =
4468 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4469
4470 MachineInstr *WidenedOp1 =
4471 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4472 MachineInstr *WidenedOp2 =
4473 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4474 if (!WidenedOp1 || !WidenedOp2) {
4475 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4476 return nullptr;
4477 }
4478
4479 // Now do the insert of the upper element.
4480 unsigned InsertOpc, InsSubRegIdx;
4481 std::tie(InsertOpc, InsSubRegIdx) =
4482 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4483
4484 if (!Dst)
4485 Dst = MRI.createVirtualRegister(DstRC);
4486 auto InsElt =
4487 MIRBuilder
4488 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4489 .addImm(1) /* Lane index */
4490 .addUse(WidenedOp2->getOperand(0).getReg())
4491 .addImm(0);
4493 return &*InsElt;
4494}
4495
4497AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4498 Register Src2, AArch64CC::CondCode Pred,
4499 MachineIRBuilder &MIRBuilder) const {
4500 auto &MRI = *MIRBuilder.getMRI();
4501 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4502 // If we used a register class, then this won't necessarily have an LLT.
4503 // Compute the size based off whether or not we have a class or bank.
4504 unsigned Size;
4505 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4506 Size = TRI.getRegSizeInBits(*RC);
4507 else
4508 Size = MRI.getType(Dst).getSizeInBits();
4509 // Some opcodes use s1.
4510 assert(Size <= 64 && "Expected 64 bits or less only!");
4511 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4512 unsigned Opc = OpcTable[Size == 64];
4513 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4515 return &*CSINC;
4516}
4517
4518MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4519 Register CarryReg) {
4521 unsigned Opcode = I.getOpcode();
4522
4523 // If the instruction is a SUB, we need to negate the carry,
4524 // because borrowing is indicated by carry-flag == 0.
4525 bool NeedsNegatedCarry =
4526 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4527
4528 // If the previous instruction will already produce the correct carry, do not
4529 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4530 // generated during legalization of wide add/sub. This optimization depends on
4531 // these sequences not being interrupted by other instructions.
4532 // We have to select the previous instruction before the carry-using
4533 // instruction is deleted by the calling function, otherwise the previous
4534 // instruction might become dead and would get deleted.
4535 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4536 if (SrcMI == I.getPrevNode()) {
4537 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4538 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4539 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4540 CarrySrcMI->isUnsigned() &&
4541 CarrySrcMI->getCarryOutReg() == CarryReg &&
4542 selectAndRestoreState(*SrcMI))
4543 return nullptr;
4544 }
4545 }
4546
4547 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4548
4549 if (NeedsNegatedCarry) {
4550 // (0 - Carry) sets !C in NZCV when Carry == 1
4551 Register ZReg = AArch64::WZR;
4552 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4553 }
4554
4555 // (Carry - 1) sets !C in NZCV when Carry == 0
4556 auto Fns = select12BitValueWithLeftShift(1);
4557 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4558}
4559
4560bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4562 auto &CarryMI = cast<GAddSubCarryOut>(I);
4563
4564 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4565 // Set NZCV carry according to carry-in VReg
4566 emitCarryIn(I, CarryInMI->getCarryInReg());
4567 }
4568
4569 // Emit the operation and get the correct condition code.
4570 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4571 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4572
4573 Register CarryOutReg = CarryMI.getCarryOutReg();
4574
4575 // Don't convert carry-out to VReg if it is never used
4576 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4577 // Now, put the overflow result in the register given by the first operand
4578 // to the overflow op. CSINC increments the result when the predicate is
4579 // false, so to get the increment when it's true, we need to use the
4580 // inverse. In this case, we want to increment when carry is set.
4581 Register ZReg = AArch64::WZR;
4582 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4583 getInvertedCondCode(OpAndCC.second), MIB);
4584 }
4585
4586 I.eraseFromParent();
4587 return true;
4588}
4589
4590std::pair<MachineInstr *, AArch64CC::CondCode>
4591AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4592 MachineOperand &LHS,
4593 MachineOperand &RHS,
4594 MachineIRBuilder &MIRBuilder) const {
4595 switch (Opcode) {
4596 default:
4597 llvm_unreachable("Unexpected opcode!");
4598 case TargetOpcode::G_SADDO:
4599 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4600 case TargetOpcode::G_UADDO:
4601 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4602 case TargetOpcode::G_SSUBO:
4603 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4604 case TargetOpcode::G_USUBO:
4605 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4606 case TargetOpcode::G_SADDE:
4607 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4608 case TargetOpcode::G_UADDE:
4609 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4610 case TargetOpcode::G_SSUBE:
4611 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4612 case TargetOpcode::G_USUBE:
4613 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4614 }
4615}
4616
4617/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4618/// expressed as a conjunction.
4619/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4620/// changing the conditions on the CMP tests.
4621/// (this means we can call emitConjunctionRec() with
4622/// Negate==true on this sub-tree)
4623/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4624/// cannot do the negation naturally. We are required to
4625/// emit the subtree first in this case.
4626/// \param WillNegate Is true if are called when the result of this
4627/// subexpression must be negated. This happens when the
4628/// outer expression is an OR. We can use this fact to know
4629/// that we have a double negation (or (or ...) ...) that
4630/// can be implemented for free.
4631static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4632 bool WillNegate, MachineRegisterInfo &MRI,
4633 unsigned Depth = 0) {
4634 if (!MRI.hasOneNonDBGUse(Val))
4635 return false;
4636 MachineInstr *ValDef = MRI.getVRegDef(Val);
4637 unsigned Opcode = ValDef->getOpcode();
4638 if (isa<GAnyCmp>(ValDef)) {
4639 CanNegate = true;
4640 MustBeFirst = false;
4641 return true;
4642 }
4643 // Protect against exponential runtime and stack overflow.
4644 if (Depth > 6)
4645 return false;
4646 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4647 bool IsOR = Opcode == TargetOpcode::G_OR;
4648 Register O0 = ValDef->getOperand(1).getReg();
4649 Register O1 = ValDef->getOperand(2).getReg();
4650 bool CanNegateL;
4651 bool MustBeFirstL;
4652 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4653 return false;
4654 bool CanNegateR;
4655 bool MustBeFirstR;
4656 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4657 return false;
4658
4659 if (MustBeFirstL && MustBeFirstR)
4660 return false;
4661
4662 if (IsOR) {
4663 // For an OR expression we need to be able to naturally negate at least
4664 // one side or we cannot do the transformation at all.
4665 if (!CanNegateL && !CanNegateR)
4666 return false;
4667 // If we the result of the OR will be negated and we can naturally negate
4668 // the leaves, then this sub-tree as a whole negates naturally.
4669 CanNegate = WillNegate && CanNegateL && CanNegateR;
4670 // If we cannot naturally negate the whole sub-tree, then this must be
4671 // emitted first.
4672 MustBeFirst = !CanNegate;
4673 } else {
4674 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4675 // We cannot naturally negate an AND operation.
4676 CanNegate = false;
4677 MustBeFirst = MustBeFirstL || MustBeFirstR;
4678 }
4679 return true;
4680 }
4681 return false;
4682}
4683
4684MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4687 MachineIRBuilder &MIB) const {
4688 auto &MRI = *MIB.getMRI();
4689 LLT OpTy = MRI.getType(LHS);
4690 unsigned CCmpOpc;
4691 std::optional<ValueAndVReg> C;
4693 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4695 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4696 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4697 else if (C->Value.ule(31))
4698 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4699 else
4700 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4701 } else {
4702 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4703 OpTy.getSizeInBits() == 64);
4704 switch (OpTy.getSizeInBits()) {
4705 case 16:
4706 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4707 CCmpOpc = AArch64::FCCMPHrr;
4708 break;
4709 case 32:
4710 CCmpOpc = AArch64::FCCMPSrr;
4711 break;
4712 case 64:
4713 CCmpOpc = AArch64::FCCMPDrr;
4714 break;
4715 default:
4716 return nullptr;
4717 }
4718 }
4720 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4721 auto CCmp =
4722 MIB.buildInstr(CCmpOpc, {}, {LHS});
4723 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4724 CCmp.addImm(C->Value.getZExtValue());
4725 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4726 CCmp.addImm(C->Value.abs().getZExtValue());
4727 else
4728 CCmp.addReg(RHS);
4729 CCmp.addImm(NZCV).addImm(Predicate);
4731 return &*CCmp;
4732}
4733
4734MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4735 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4736 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4737 // We're at a tree leaf, produce a conditional comparison operation.
4738 auto &MRI = *MIB.getMRI();
4739 MachineInstr *ValDef = MRI.getVRegDef(Val);
4740 unsigned Opcode = ValDef->getOpcode();
4741 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4742 Register LHS = Cmp->getLHSReg();
4743 Register RHS = Cmp->getRHSReg();
4744 CmpInst::Predicate CC = Cmp->getCond();
4745 if (Negate)
4747 if (isa<GICmp>(Cmp)) {
4749 } else {
4750 // Handle special FP cases.
4751 AArch64CC::CondCode ExtraCC;
4752 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4753 // Some floating point conditions can't be tested with a single condition
4754 // code. Construct an additional comparison in this case.
4755 if (ExtraCC != AArch64CC::AL) {
4756 MachineInstr *ExtraCmp;
4757 if (!CCOp)
4758 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4759 else
4760 ExtraCmp =
4761 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4762 CCOp = ExtraCmp->getOperand(0).getReg();
4763 Predicate = ExtraCC;
4764 }
4765 }
4766
4767 // Produce a normal comparison if we are first in the chain
4768 if (!CCOp) {
4769 auto Dst = MRI.cloneVirtualRegister(LHS);
4770 if (isa<GICmp>(Cmp))
4771 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4772 return emitFPCompare(Cmp->getOperand(2).getReg(),
4773 Cmp->getOperand(3).getReg(), MIB);
4774 }
4775 // Otherwise produce a ccmp.
4776 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4777 }
4778 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4779
4780 bool IsOR = Opcode == TargetOpcode::G_OR;
4781
4782 Register LHS = ValDef->getOperand(1).getReg();
4783 bool CanNegateL;
4784 bool MustBeFirstL;
4785 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4786 assert(ValidL && "Valid conjunction/disjunction tree");
4787 (void)ValidL;
4788
4789 Register RHS = ValDef->getOperand(2).getReg();
4790 bool CanNegateR;
4791 bool MustBeFirstR;
4792 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4793 assert(ValidR && "Valid conjunction/disjunction tree");
4794 (void)ValidR;
4795
4796 // Swap sub-tree that must come first to the right side.
4797 if (MustBeFirstL) {
4798 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4799 std::swap(LHS, RHS);
4800 std::swap(CanNegateL, CanNegateR);
4801 std::swap(MustBeFirstL, MustBeFirstR);
4802 }
4803
4804 bool NegateR;
4805 bool NegateAfterR;
4806 bool NegateL;
4807 bool NegateAfterAll;
4808 if (Opcode == TargetOpcode::G_OR) {
4809 // Swap the sub-tree that we can negate naturally to the left.
4810 if (!CanNegateL) {
4811 assert(CanNegateR && "at least one side must be negatable");
4812 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4813 assert(!Negate);
4814 std::swap(LHS, RHS);
4815 NegateR = false;
4816 NegateAfterR = true;
4817 } else {
4818 // Negate the left sub-tree if possible, otherwise negate the result.
4819 NegateR = CanNegateR;
4820 NegateAfterR = !CanNegateR;
4821 }
4822 NegateL = true;
4823 NegateAfterAll = !Negate;
4824 } else {
4825 assert(Opcode == TargetOpcode::G_AND &&
4826 "Valid conjunction/disjunction tree");
4827 assert(!Negate && "Valid conjunction/disjunction tree");
4828
4829 NegateL = false;
4830 NegateR = false;
4831 NegateAfterR = false;
4832 NegateAfterAll = false;
4833 }
4834
4835 // Emit sub-trees.
4836 AArch64CC::CondCode RHSCC;
4837 MachineInstr *CmpR =
4838 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4839 if (NegateAfterR)
4840 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4842 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4843 if (NegateAfterAll)
4844 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4845 return CmpL;
4846}
4847
4848MachineInstr *AArch64InstructionSelector::emitConjunction(
4849 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4850 bool DummyCanNegate;
4851 bool DummyMustBeFirst;
4852 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4853 *MIB.getMRI()))
4854 return nullptr;
4855 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4856}
4857
4858bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4859 MachineInstr &CondMI) {
4860 AArch64CC::CondCode AArch64CC;
4861 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4862 if (!ConjMI)
4863 return false;
4864
4865 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4866 SelI.eraseFromParent();
4867 return true;
4868}
4869
4870bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4871 MachineRegisterInfo &MRI = *MIB.getMRI();
4872 // We want to recognize this pattern:
4873 //
4874 // $z = G_FCMP pred, $x, $y
4875 // ...
4876 // $w = G_SELECT $z, $a, $b
4877 //
4878 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4879 // some copies/truncs in between.)
4880 //
4881 // If we see this, then we can emit something like this:
4882 //
4883 // fcmp $x, $y
4884 // fcsel $w, $a, $b, pred
4885 //
4886 // Rather than emitting both of the rather long sequences in the standard
4887 // G_FCMP/G_SELECT select methods.
4888
4889 // First, check if the condition is defined by a compare.
4890 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4891
4892 // We can only fold if all of the defs have one use.
4893 Register CondDefReg = CondDef->getOperand(0).getReg();
4894 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4895 // Unless it's another select.
4896 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4897 if (CondDef == &UI)
4898 continue;
4899 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4900 return false;
4901 }
4902 }
4903
4904 // Is the condition defined by a compare?
4905 unsigned CondOpc = CondDef->getOpcode();
4906 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4907 if (tryOptSelectConjunction(I, *CondDef))
4908 return true;
4909 return false;
4910 }
4911
4913 if (CondOpc == TargetOpcode::G_ICMP) {
4914 auto Pred =
4915 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4917 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4918 CondDef->getOperand(1), MIB);
4919 } else {
4920 // Get the condition code for the select.
4921 auto Pred =
4922 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4923 AArch64CC::CondCode CondCode2;
4924 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4925
4926 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4927 // instructions to emit the comparison.
4928 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4929 // unnecessary.
4930 if (CondCode2 != AArch64CC::AL)
4931 return false;
4932
4933 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4934 CondDef->getOperand(3).getReg(), MIB)) {
4935 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4936 return false;
4937 }
4938 }
4939
4940 // Emit the select.
4941 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4942 I.getOperand(3).getReg(), CondCode, MIB);
4943 I.eraseFromParent();
4944 return true;
4945}
4946
4947MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4948 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4949 MachineIRBuilder &MIRBuilder) const {
4950 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4951 "Unexpected MachineOperand");
4952 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4953 // We want to find this sort of thing:
4954 // x = G_SUB 0, y
4955 // G_ICMP z, x
4956 //
4957 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4958 // e.g:
4959 //
4960 // cmn z, y
4961
4962 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4963 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4964 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4965 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4966 // Given this:
4967 //
4968 // x = G_SUB 0, y
4969 // G_ICMP x, z
4970 //
4971 // Produce this:
4972 //
4973 // cmn y, z
4974 if (isCMN(LHSDef, P, MRI))
4975 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4976
4977 // Same idea here, but with the RHS of the compare instead:
4978 //
4979 // Given this:
4980 //
4981 // x = G_SUB 0, y
4982 // G_ICMP z, x
4983 //
4984 // Produce this:
4985 //
4986 // cmn z, y
4987 if (isCMN(RHSDef, P, MRI))
4988 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4989
4990 // Given this:
4991 //
4992 // z = G_AND x, y
4993 // G_ICMP z, 0
4994 //
4995 // Produce this if the compare is signed:
4996 //
4997 // tst x, y
4998 if (!CmpInst::isUnsigned(P) && LHSDef &&
4999 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5000 // Make sure that the RHS is 0.
5001 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5002 if (!ValAndVReg || ValAndVReg->Value != 0)
5003 return nullptr;
5004
5005 return emitTST(LHSDef->getOperand(1),
5006 LHSDef->getOperand(2), MIRBuilder);
5007 }
5008
5009 return nullptr;
5010}
5011
5012bool AArch64InstructionSelector::selectShuffleVector(
5014 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5015 Register Src1Reg = I.getOperand(1).getReg();
5016 const LLT Src1Ty = MRI.getType(Src1Reg);
5017 Register Src2Reg = I.getOperand(2).getReg();
5018 const LLT Src2Ty = MRI.getType(Src2Reg);
5019 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5020
5021 MachineBasicBlock &MBB = *I.getParent();
5022 MachineFunction &MF = *MBB.getParent();
5023 LLVMContext &Ctx = MF.getFunction().getContext();
5024
5025 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5026 // it's originated from a <1 x T> type. Those should have been lowered into
5027 // G_BUILD_VECTOR earlier.
5028 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5029 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5030 return false;
5031 }
5032
5033 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5034
5036 for (int Val : Mask) {
5037 // For now, any undef indexes we'll just assume to be 0. This should be
5038 // optimized in future, e.g. to select DUP etc.
5039 Val = Val < 0 ? 0 : Val;
5040 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5041 unsigned Offset = Byte + Val * BytesPerElt;
5042 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5043 }
5044 }
5045
5046 // Use a constant pool to load the index vector for TBL.
5047 Constant *CPVal = ConstantVector::get(CstIdxs);
5048 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5049 if (!IndexLoad) {
5050 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5051 return false;
5052 }
5053
5054 if (DstTy.getSizeInBits() != 128) {
5055 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5056 // This case can be done with TBL1.
5058 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5059 if (!Concat) {
5060 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5061 return false;
5062 }
5063
5064 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5065 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5066 IndexLoad->getOperand(0).getReg(), MIB);
5067
5068 auto TBL1 = MIB.buildInstr(
5069 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5070 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5072
5073 auto Copy =
5074 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5075 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5076 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5077 I.eraseFromParent();
5078 return true;
5079 }
5080
5081 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5082 // Q registers for regalloc.
5083 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5084 auto RegSeq = createQTuple(Regs, MIB);
5085 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5086 {RegSeq, IndexLoad->getOperand(0)});
5088 I.eraseFromParent();
5089 return true;
5090}
5091
5092MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5093 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5094 unsigned LaneIdx, const RegisterBank &RB,
5095 MachineIRBuilder &MIRBuilder) const {
5096 MachineInstr *InsElt = nullptr;
5097 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5098 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5099
5100 // Create a register to define with the insert if one wasn't passed in.
5101 if (!DstReg)
5102 DstReg = MRI.createVirtualRegister(DstRC);
5103
5104 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5105 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5106
5107 if (RB.getID() == AArch64::FPRRegBankID) {
5108 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5109 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5110 .addImm(LaneIdx)
5111 .addUse(InsSub->getOperand(0).getReg())
5112 .addImm(0);
5113 } else {
5114 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5115 .addImm(LaneIdx)
5116 .addUse(EltReg);
5117 }
5118
5120 return InsElt;
5121}
5122
5123bool AArch64InstructionSelector::selectUSMovFromExtend(
5125 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5126 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5127 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5128 return false;
5129 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5130 const Register DefReg = MI.getOperand(0).getReg();
5131 const LLT DstTy = MRI.getType(DefReg);
5132 unsigned DstSize = DstTy.getSizeInBits();
5133
5134 if (DstSize != 32 && DstSize != 64)
5135 return false;
5136
5137 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5138 MI.getOperand(1).getReg(), MRI);
5139 int64_t Lane;
5140 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5141 return false;
5142 Register Src0 = Extract->getOperand(1).getReg();
5143
5144 const LLT &VecTy = MRI.getType(Src0);
5145
5146 if (VecTy.getSizeInBits() != 128) {
5147 const MachineInstr *ScalarToVector = emitScalarToVector(
5148 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5149 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5150 Src0 = ScalarToVector->getOperand(0).getReg();
5151 }
5152
5153 unsigned Opcode;
5154 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5155 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5156 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5157 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5158 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5159 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5160 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5161 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5162 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5163 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5164 else
5165 llvm_unreachable("Unexpected type combo for S/UMov!");
5166
5167 // We may need to generate one of these, depending on the type and sign of the
5168 // input:
5169 // DstReg = SMOV Src0, Lane;
5170 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5171 MachineInstr *ExtI = nullptr;
5172 if (DstSize == 64 && !IsSigned) {
5173 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5174 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5175 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5176 .addImm(0)
5177 .addUse(NewReg)
5178 .addImm(AArch64::sub_32);
5179 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5180 } else
5181 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5182
5184 MI.eraseFromParent();
5185 return true;
5186}
5187
5188MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5189 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5190 unsigned int Op;
5191 if (DstSize == 128) {
5192 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5193 return nullptr;
5194 Op = AArch64::MOVIv16b_ns;
5195 } else {
5196 Op = AArch64::MOVIv8b_ns;
5197 }
5198
5199 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5200
5203 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5205 return &*Mov;
5206 }
5207 return nullptr;
5208}
5209
5210MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5211 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5212 bool Inv) {
5213
5214 unsigned int Op;
5215 if (DstSize == 128) {
5216 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5217 return nullptr;
5218 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5219 } else {
5220 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5221 }
5222
5223 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5224 uint64_t Shift;
5225
5228 Shift = 0;
5229 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5231 Shift = 8;
5232 } else
5233 return nullptr;
5234
5235 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5237 return &*Mov;
5238}
5239
5240MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5241 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5242 bool Inv) {
5243
5244 unsigned int Op;
5245 if (DstSize == 128) {
5246 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5247 return nullptr;
5248 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5249 } else {
5250 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5251 }
5252
5253 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5254 uint64_t Shift;
5255
5258 Shift = 0;
5259 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5261 Shift = 8;
5262 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5264 Shift = 16;
5265 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5267 Shift = 24;
5268 } else
5269 return nullptr;
5270
5271 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5273 return &*Mov;
5274}
5275
5276MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5277 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5278
5279 unsigned int Op;
5280 if (DstSize == 128) {
5281 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5282 return nullptr;
5283 Op = AArch64::MOVIv2d_ns;
5284 } else {
5285 Op = AArch64::MOVID;
5286 }
5287
5288 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5291 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5293 return &*Mov;
5294 }
5295 return nullptr;
5296}
5297
5298MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5299 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5300 bool Inv) {
5301
5302 unsigned int Op;
5303 if (DstSize == 128) {
5304 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5305 return nullptr;
5306 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5307 } else {
5308 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5309 }
5310
5311 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5312 uint64_t Shift;
5313
5316 Shift = 264;
5317 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5319 Shift = 272;
5320 } else
5321 return nullptr;
5322
5323 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5325 return &*Mov;
5326}
5327
5328MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5329 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5330
5331 unsigned int Op;
5332 bool IsWide = false;
5333 if (DstSize == 128) {
5334 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5335 return nullptr;
5336 Op = AArch64::FMOVv4f32_ns;
5337 IsWide = true;
5338 } else {
5339 Op = AArch64::FMOVv2f32_ns;
5340 }
5341
5342 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5343
5346 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5348 Op = AArch64::FMOVv2f64_ns;
5349 } else
5350 return nullptr;
5351
5352 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5354 return &*Mov;
5355}
5356
5357bool AArch64InstructionSelector::selectIndexedExtLoad(
5359 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5360 Register Dst = ExtLd.getDstReg();
5361 Register WriteBack = ExtLd.getWritebackReg();
5362 Register Base = ExtLd.getBaseReg();
5363 Register Offset = ExtLd.getOffsetReg();
5364 LLT Ty = MRI.getType(Dst);
5365 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5366 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5367 bool IsPre = ExtLd.isPre();
5368 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5369 bool InsertIntoXReg = false;
5370 bool IsDst64 = Ty.getSizeInBits() == 64;
5371
5372 unsigned Opc = 0;
5373 LLT NewLdDstTy;
5374 LLT s32 = LLT::scalar(32);
5375 LLT s64 = LLT::scalar(64);
5376
5377 if (MemSizeBits == 8) {
5378 if (IsSExt) {
5379 if (IsDst64)
5380 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5381 else
5382 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5383 NewLdDstTy = IsDst64 ? s64 : s32;
5384 } else {
5385 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5386 InsertIntoXReg = IsDst64;
5387 NewLdDstTy = s32;
5388 }
5389 } else if (MemSizeBits == 16) {
5390 if (IsSExt) {
5391 if (IsDst64)
5392 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5393 else
5394 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5395 NewLdDstTy = IsDst64 ? s64 : s32;
5396 } else {
5397 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5398 InsertIntoXReg = IsDst64;
5399 NewLdDstTy = s32;
5400 }
5401 } else if (MemSizeBits == 32) {
5402 if (IsSExt) {
5403 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5404 NewLdDstTy = s64;
5405 } else {
5406 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5407 InsertIntoXReg = IsDst64;
5408 NewLdDstTy = s32;
5409 }
5410 } else {
5411 llvm_unreachable("Unexpected size for indexed load");
5412 }
5413
5414 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5415 return false; // We should be on gpr.
5416
5417 auto Cst = getIConstantVRegVal(Offset, MRI);
5418 if (!Cst)
5419 return false; // Shouldn't happen, but just in case.
5420
5421 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5422 .addImm(Cst->getSExtValue());
5423 LdMI.cloneMemRefs(ExtLd);
5425 // Make sure to select the load with the MemTy as the dest type, and then
5426 // insert into X reg if needed.
5427 if (InsertIntoXReg) {
5428 // Generate a SUBREG_TO_REG.
5429 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5430 .addImm(0)
5431 .addUse(LdMI.getReg(1))
5432 .addImm(AArch64::sub_32);
5433 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5434 MRI);
5435 } else {
5436 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5437 selectCopy(*Copy, TII, MRI, TRI, RBI);
5438 }
5439 MI.eraseFromParent();
5440
5441 return true;
5442}
5443
5444bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5446 auto &Ld = cast<GIndexedLoad>(MI);
5447 Register Dst = Ld.getDstReg();
5448 Register WriteBack = Ld.getWritebackReg();
5449 Register Base = Ld.getBaseReg();
5450 Register Offset = Ld.getOffsetReg();
5451 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5452 "Unexpected type for indexed load");
5453 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5454
5455 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5456 return selectIndexedExtLoad(MI, MRI);
5457
5458 unsigned Opc = 0;
5459 if (Ld.isPre()) {
5460 static constexpr unsigned GPROpcodes[] = {
5461 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5462 AArch64::LDRXpre};
5463 static constexpr unsigned FPROpcodes[] = {
5464 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5465 AArch64::LDRQpre};
5466 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5467 Opc = FPROpcodes[Log2_32(MemSize)];
5468 else
5469 Opc = GPROpcodes[Log2_32(MemSize)];
5470 } else {
5471 static constexpr unsigned GPROpcodes[] = {
5472 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5473 AArch64::LDRXpost};
5474 static constexpr unsigned FPROpcodes[] = {
5475 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5476 AArch64::LDRDpost, AArch64::LDRQpost};
5477 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5478 Opc = FPROpcodes[Log2_32(MemSize)];
5479 else
5480 Opc = GPROpcodes[Log2_32(MemSize)];
5481 }
5482 auto Cst = getIConstantVRegVal(Offset, MRI);
5483 if (!Cst)
5484 return false; // Shouldn't happen, but just in case.
5485 auto LdMI =
5486 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5487 LdMI.cloneMemRefs(Ld);
5489 MI.eraseFromParent();
5490 return true;
5491}
5492
5493bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5495 Register Dst = I.getWritebackReg();
5496 Register Val = I.getValueReg();
5497 Register Base = I.getBaseReg();
5498 Register Offset = I.getOffsetReg();
5499 LLT ValTy = MRI.getType(Val);
5500 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5501
5502 unsigned Opc = 0;
5503 if (I.isPre()) {
5504 static constexpr unsigned GPROpcodes[] = {
5505 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5506 AArch64::STRXpre};
5507 static constexpr unsigned FPROpcodes[] = {
5508 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5509 AArch64::STRQpre};
5510
5511 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5512 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5513 else
5514 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5515 } else {
5516 static constexpr unsigned GPROpcodes[] = {
5517 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5518 AArch64::STRXpost};
5519 static constexpr unsigned FPROpcodes[] = {
5520 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5521 AArch64::STRDpost, AArch64::STRQpost};
5522
5523 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5524 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5525 else
5526 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5527 }
5528
5529 auto Cst = getIConstantVRegVal(Offset, MRI);
5530 if (!Cst)
5531 return false; // Shouldn't happen, but just in case.
5532 auto Str =
5533 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5534 Str.cloneMemRefs(I);
5536 I.eraseFromParent();
5537 return true;
5538}
5539
5541AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5542 MachineIRBuilder &MIRBuilder,
5544 LLT DstTy = MRI.getType(Dst);
5545 unsigned DstSize = DstTy.getSizeInBits();
5546 if (CV->isNullValue()) {
5547 if (DstSize == 128) {
5548 auto Mov =
5549 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5551 return &*Mov;
5552 }
5553
5554 if (DstSize == 64) {
5555 auto Mov =
5556 MIRBuilder
5557 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5558 .addImm(0);
5559 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5560 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5561 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5562 return &*Copy;
5563 }
5564 }
5565
5566 if (CV->getSplatValue()) {
5567 APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5568 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5569 MachineInstr *NewOp;
5570 bool Inv = false;
5571 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5572 (NewOp =
5573 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5574 (NewOp =
5575 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5576 (NewOp =
5577 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5578 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5579 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5580 return NewOp;
5581
5582 DefBits = ~DefBits;
5583 Inv = true;
5584 if ((NewOp =
5585 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5586 (NewOp =
5587 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5588 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5589 return NewOp;
5590 return nullptr;
5591 };
5592
5593 if (auto *NewOp = TryMOVIWithBits(DefBits))
5594 return NewOp;
5595
5596 // See if a fneg of the constant can be materialized with a MOVI, etc
5597 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5598 unsigned NegOpc) -> MachineInstr * {
5599 // FNegate each sub-element of the constant
5600 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5601 APInt NegBits(DstSize, 0);
5602 unsigned NumElts = DstSize / NumBits;
5603 for (unsigned i = 0; i < NumElts; i++)
5604 NegBits |= Neg << (NumBits * i);
5605 NegBits = DefBits ^ NegBits;
5606
5607 // Try to create the new constants with MOVI, and if so generate a fneg
5608 // for it.
5609 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5610 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5611 NewOp->getOperand(0).setReg(NewDst);
5612 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5613 }
5614 return nullptr;
5615 };
5616 MachineInstr *R;
5617 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5618 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5619 (STI.hasFullFP16() &&
5620 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5621 return R;
5622 }
5623
5624 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5625 if (!CPLoad) {
5626 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5627 return nullptr;
5628 }
5629
5630 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5631 RBI.constrainGenericRegister(
5632 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5633 return &*Copy;
5634}
5635
5636bool AArch64InstructionSelector::tryOptConstantBuildVec(
5638 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5639 unsigned DstSize = DstTy.getSizeInBits();
5640 assert(DstSize <= 128 && "Unexpected build_vec type!");
5641 if (DstSize < 32)
5642 return false;
5643 // Check if we're building a constant vector, in which case we want to
5644 // generate a constant pool load instead of a vector insert sequence.
5646 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5647 // Try to find G_CONSTANT or G_FCONSTANT
5648 auto *OpMI =
5649 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5650 if (OpMI)
5651 Csts.emplace_back(
5652 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5653 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5654 I.getOperand(Idx).getReg(), MRI)))
5655 Csts.emplace_back(
5656 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5657 else
5658 return false;
5659 }
5660 Constant *CV = ConstantVector::get(Csts);
5661 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5662 return false;
5663 I.eraseFromParent();
5664 return true;
5665}
5666
5667bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5669 // Given:
5670 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5671 //
5672 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5673 Register Dst = I.getOperand(0).getReg();
5674 Register EltReg = I.getOperand(1).getReg();
5675 LLT EltTy = MRI.getType(EltReg);
5676 // If the index isn't on the same bank as its elements, then this can't be a
5677 // SUBREG_TO_REG.
5678 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5679 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5680 if (EltRB != DstRB)
5681 return false;
5682 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5683 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5684 }))
5685 return false;
5686 unsigned SubReg;
5687 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5688 if (!EltRC)
5689 return false;
5690 const TargetRegisterClass *DstRC =
5691 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5692 if (!DstRC)
5693 return false;
5694 if (!getSubRegForClass(EltRC, TRI, SubReg))
5695 return false;
5696 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5697 .addImm(0)
5698 .addUse(EltReg)
5699 .addImm(SubReg);
5700 I.eraseFromParent();
5701 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5702 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5703}
5704
5705bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5707 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5708 // Until we port more of the optimized selections, for now just use a vector
5709 // insert sequence.
5710 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5711 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5712 unsigned EltSize = EltTy.getSizeInBits();
5713
5714 if (tryOptConstantBuildVec(I, DstTy, MRI))
5715 return true;
5716 if (tryOptBuildVecToSubregToReg(I, MRI))
5717 return true;
5718
5719 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5720 return false; // Don't support all element types yet.
5721 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5722
5723 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5724 MachineInstr *ScalarToVec =
5725 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5726 I.getOperand(1).getReg(), MIB);
5727 if (!ScalarToVec)
5728 return false;
5729
5730 Register DstVec = ScalarToVec->getOperand(0).getReg();
5731 unsigned DstSize = DstTy.getSizeInBits();
5732
5733 // Keep track of the last MI we inserted. Later on, we might be able to save
5734 // a copy using it.
5735 MachineInstr *PrevMI = ScalarToVec;
5736 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5737 // Note that if we don't do a subregister copy, we can end up making an
5738 // extra register.
5739 Register OpReg = I.getOperand(i).getReg();
5740 // Do not emit inserts for undefs
5741 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5742 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5743 DstVec = PrevMI->getOperand(0).getReg();
5744 }
5745 }
5746
5747 // If DstTy's size in bits is less than 128, then emit a subregister copy
5748 // from DstVec to the last register we've defined.
5749 if (DstSize < 128) {
5750 // Force this to be FPR using the destination vector.
5751 const TargetRegisterClass *RC =
5752 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5753 if (!RC)
5754 return false;
5755 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5756 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5757 return false;
5758 }
5759
5760 unsigned SubReg = 0;
5761 if (!getSubRegForClass(RC, TRI, SubReg))
5762 return false;
5763 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5764 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5765 << "\n");
5766 return false;
5767 }
5768
5769 Register Reg = MRI.createVirtualRegister(RC);
5770 Register DstReg = I.getOperand(0).getReg();
5771
5772 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5773 MachineOperand &RegOp = I.getOperand(1);
5774 RegOp.setReg(Reg);
5775 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5776 } else {
5777 // We either have a vector with all elements (except the first one) undef or
5778 // at least one non-undef non-first element. In the first case, we need to
5779 // constrain the output register ourselves as we may have generated an
5780 // INSERT_SUBREG operation which is a generic operation for which the
5781 // output regclass cannot be automatically chosen.
5782 //
5783 // In the second case, there is no need to do this as it may generate an
5784 // instruction like INSvi32gpr where the regclass can be automatically
5785 // chosen.
5786 //
5787 // Also, we save a copy by re-using the destination register on the final
5788 // insert.
5789 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5791
5792 Register DstReg = PrevMI->getOperand(0).getReg();
5793 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5794 const TargetRegisterClass *RC =
5795 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5796 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5797 }
5798 }
5799
5800 I.eraseFromParent();
5801 return true;
5802}
5803
5804bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5805 unsigned NumVecs,
5806 MachineInstr &I) {
5807 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5808 assert(Opc && "Expected an opcode?");
5809 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5810 auto &MRI = *MIB.getMRI();
5811 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5812 unsigned Size = Ty.getSizeInBits();
5813 assert((Size == 64 || Size == 128) &&
5814 "Destination must be 64 bits or 128 bits?");
5815 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5816 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5817 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5818 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5819 Load.cloneMemRefs(I);
5821 Register SelectedLoadDst = Load->getOperand(0).getReg();
5822 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5823 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5824 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5825 // Emit the subreg copies and immediately select them.
5826 // FIXME: We should refactor our copy code into an emitCopy helper and
5827 // clean up uses of this pattern elsewhere in the selector.
5828 selectCopy(*Vec, TII, MRI, TRI, RBI);
5829 }
5830 return true;
5831}
5832
5833bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5834 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5835 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5836 assert(Opc && "Expected an opcode?");
5837 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5838 auto &MRI = *MIB.getMRI();
5839 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5840 bool Narrow = Ty.getSizeInBits() == 64;
5841
5842 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5843 SmallVector<Register, 4> Regs(NumVecs);
5844 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5845 [](auto MO) { return MO.getReg(); });
5846
5847 if (Narrow) {
5848 transform(Regs, Regs.begin(), [this](Register Reg) {
5849 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5850 ->getOperand(0)
5851 .getReg();
5852 });
5853 Ty = Ty.multiplyElements(2);
5854 }
5855
5856 Register Tuple = createQTuple(Regs, MIB);
5857 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5858 if (!LaneNo)
5859 return false;
5860
5861 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
5862 auto Load = MIB.buildInstr(Opc, {Ty}, {})
5863 .addReg(Tuple)
5864 .addImm(LaneNo->getZExtValue())
5865 .addReg(Ptr);
5866 Load.cloneMemRefs(I);
5868 Register SelectedLoadDst = Load->getOperand(0).getReg();
5869 unsigned SubReg = AArch64::qsub0;
5870 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5871 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
5872 {Narrow ? DstOp(&AArch64::FPR128RegClass)
5873 : DstOp(I.getOperand(Idx).getReg())},
5874 {})
5875 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5876 Register WideReg = Vec.getReg(0);
5877 // Emit the subreg copies and immediately select them.
5878 selectCopy(*Vec, TII, MRI, TRI, RBI);
5879 if (Narrow &&
5880 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
5881 return false;
5882 }
5883 return true;
5884}
5885
5886void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
5887 unsigned NumVecs,
5888 unsigned Opc) {
5889 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5890 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5891 Register Ptr = I.getOperand(1 + NumVecs).getReg();
5892
5893 SmallVector<Register, 2> Regs(NumVecs);
5894 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5895 Regs.begin(), [](auto MO) { return MO.getReg(); });
5896
5897 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5898 : createDTuple(Regs, MIB);
5899 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5900 Store.cloneMemRefs(I);
5902}
5903
5904bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5905 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
5906 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5907 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5908 bool Narrow = Ty.getSizeInBits() == 64;
5909
5910 SmallVector<Register, 2> Regs(NumVecs);
5911 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5912 Regs.begin(), [](auto MO) { return MO.getReg(); });
5913
5914 if (Narrow)
5915 transform(Regs, Regs.begin(), [this](Register Reg) {
5916 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5917 ->getOperand(0)
5918 .getReg();
5919 });
5920
5921 Register Tuple = createQTuple(Regs, MIB);
5922
5923 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
5924 if (!LaneNo)
5925 return false;
5926 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
5927 auto Store = MIB.buildInstr(Opc, {}, {})
5928 .addReg(Tuple)
5929 .addImm(LaneNo->getZExtValue())
5930 .addReg(Ptr);
5931 Store.cloneMemRefs(I);
5933 return true;
5934}
5935
5936bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5938 // Find the intrinsic ID.
5939 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
5940
5941 const LLT S8 = LLT::scalar(8);
5942 const LLT S16 = LLT::scalar(16);
5943 const LLT S32 = LLT::scalar(32);
5944 const LLT S64 = LLT::scalar(64);
5945 const LLT P0 = LLT::pointer(0, 64);
5946 // Select the instruction.
5947 switch (IntrinID) {
5948 default:
5949 return false;
5950 case Intrinsic::aarch64_ldxp:
5951 case Intrinsic::aarch64_ldaxp: {
5952 auto NewI = MIB.buildInstr(
5953 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5954 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5955 {I.getOperand(3)});
5956 NewI.cloneMemRefs(I);
5958 break;
5959 }
5960 case Intrinsic::aarch64_neon_ld1x2: {
5961 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5962 unsigned Opc = 0;
5963 if (Ty == LLT::fixed_vector(8, S8))
5964 Opc = AArch64::LD1Twov8b;
5965 else if (Ty == LLT::fixed_vector(16, S8))
5966 Opc = AArch64::LD1Twov16b;
5967 else if (Ty == LLT::fixed_vector(4, S16))
5968 Opc = AArch64::LD1Twov4h;
5969 else if (Ty == LLT::fixed_vector(8, S16))
5970 Opc = AArch64::LD1Twov8h;
5971 else if (Ty == LLT::fixed_vector(2, S32))
5972 Opc = AArch64::LD1Twov2s;
5973 else if (Ty == LLT::fixed_vector(4, S32))
5974 Opc = AArch64::LD1Twov4s;
5975 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5976 Opc = AArch64::LD1Twov2d;
5977 else if (Ty == S64 || Ty == P0)
5978 Opc = AArch64::LD1Twov1d;
5979 else
5980 llvm_unreachable("Unexpected type for ld1x2!");
5981 selectVectorLoadIntrinsic(Opc, 2, I);
5982 break;
5983 }
5984 case Intrinsic::aarch64_neon_ld1x3: {
5985 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5986 unsigned Opc = 0;
5987 if (Ty == LLT::fixed_vector(8, S8))
5988 Opc = AArch64::LD1Threev8b;
5989 else if (Ty == LLT::fixed_vector(16, S8))
5990 Opc = AArch64::LD1Threev16b;
5991 else if (Ty == LLT::fixed_vector(4, S16))
5992 Opc = AArch64::LD1Threev4h;
5993 else if (Ty == LLT::fixed_vector(8, S16))
5994 Opc = AArch64::LD1Threev8h;
5995 else if (Ty == LLT::fixed_vector(2, S32))
5996 Opc = AArch64::LD1Threev2s;
5997 else if (Ty == LLT::fixed_vector(4, S32))
5998 Opc = AArch64::LD1Threev4s;
5999 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6000 Opc = AArch64::LD1Threev2d;
6001 else if (Ty == S64 || Ty == P0)
6002 Opc = AArch64::LD1Threev1d;
6003 else
6004 llvm_unreachable("Unexpected type for ld1x3!");
6005 selectVectorLoadIntrinsic(Opc, 3, I);
6006 break;
6007 }
6008 case Intrinsic::aarch64_neon_ld1x4: {
6009 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6010 unsigned Opc = 0;
6011 if (Ty == LLT::fixed_vector(8, S8))
6012 Opc = AArch64::LD1Fourv8b;
6013 else if (Ty == LLT::fixed_vector(16, S8))
6014 Opc = AArch64::LD1Fourv16b;
6015 else if (Ty == LLT::fixed_vector(4, S16))
6016 Opc = AArch64::LD1Fourv4h;
6017 else if (Ty == LLT::fixed_vector(8, S16))
6018 Opc = AArch64::LD1Fourv8h;
6019 else if (Ty == LLT::fixed_vector(2, S32))
6020 Opc = AArch64::LD1Fourv2s;
6021 else if (Ty == LLT::fixed_vector(4, S32))
6022 Opc = AArch64::LD1Fourv4s;
6023 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6024 Opc = AArch64::LD1Fourv2d;
6025 else if (Ty == S64 || Ty == P0)
6026 Opc = AArch64::LD1Fourv1d;
6027 else
6028 llvm_unreachable("Unexpected type for ld1x4!");
6029 selectVectorLoadIntrinsic(Opc, 4, I);
6030 break;
6031 }
6032 case Intrinsic::aarch64_neon_ld2: {
6033 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6034 unsigned Opc = 0;
6035 if (Ty == LLT::fixed_vector(8, S8))
6036 Opc = AArch64::LD2Twov8b;
6037 else if (Ty == LLT::fixed_vector(16, S8))
6038 Opc = AArch64::LD2Twov16b;
6039 else if (Ty == LLT::fixed_vector(4, S16))
6040 Opc = AArch64::LD2Twov4h;
6041 else if (Ty == LLT::fixed_vector(8, S16))
6042 Opc = AArch64::LD2Twov8h;
6043 else if (Ty == LLT::fixed_vector(2, S32))
6044 Opc = AArch64::LD2Twov2s;
6045 else if (Ty == LLT::fixed_vector(4, S32))
6046 Opc = AArch64::LD2Twov4s;
6047 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6048 Opc = AArch64::LD2Twov2d;
6049 else if (Ty == S64 || Ty == P0)
6050 Opc = AArch64::LD1Twov1d;
6051 else
6052 llvm_unreachable("Unexpected type for ld2!");
6053 selectVectorLoadIntrinsic(Opc, 2, I);
6054 break;
6055 }
6056 case Intrinsic::aarch64_neon_ld2lane: {
6057 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6058 unsigned Opc;
6059 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6060 Opc = AArch64::LD2i8;
6061 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6062 Opc = AArch64::LD2i16;
6063 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6064 Opc = AArch64::LD2i32;
6065 else if (Ty == LLT::fixed_vector(2, S64) ||
6066 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6067 Opc = AArch64::LD2i64;
6068 else
6069 llvm_unreachable("Unexpected type for st2lane!");
6070 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6071 return false;
6072 break;
6073 }
6074 case Intrinsic::aarch64_neon_ld2r: {
6075 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6076 unsigned Opc = 0;
6077 if (Ty == LLT::fixed_vector(8, S8))
6078 Opc = AArch64::LD2Rv8b;
6079 else if (Ty == LLT::fixed_vector(16, S8))
6080 Opc = AArch64::LD2Rv16b;
6081 else if (Ty == LLT::fixed_vector(4, S16))
6082 Opc = AArch64::LD2Rv4h;
6083 else if (Ty == LLT::fixed_vector(8, S16))
6084 Opc = AArch64::LD2Rv8h;
6085 else if (Ty == LLT::fixed_vector(2, S32))
6086 Opc = AArch64::LD2Rv2s;
6087 else if (Ty == LLT::fixed_vector(4, S32))
6088 Opc = AArch64::LD2Rv4s;
6089 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6090 Opc = AArch64::LD2Rv2d;
6091 else if (Ty == S64 || Ty == P0)
6092 Opc = AArch64::LD2Rv1d;
6093 else
6094 llvm_unreachable("Unexpected type for ld2r!");
6095 selectVectorLoadIntrinsic(Opc, 2, I);
6096 break;
6097 }
6098 case Intrinsic::aarch64_neon_ld3: {
6099 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6100 unsigned Opc = 0;
6101 if (Ty == LLT::fixed_vector(8, S8))
6102 Opc = AArch64::LD3Threev8b;
6103 else if (Ty == LLT::fixed_vector(16, S8))
6104 Opc = AArch64::LD3Threev16b;
6105 else if (Ty == LLT::fixed_vector(4, S16))
6106 Opc = AArch64::LD3Threev4h;
6107 else if (Ty == LLT::fixed_vector(8, S16))
6108 Opc = AArch64::LD3Threev8h;
6109 else if (Ty == LLT::fixed_vector(2, S32))
6110 Opc = AArch64::LD3Threev2s;
6111 else if (Ty == LLT::fixed_vector(4, S32))
6112 Opc = AArch64::LD3Threev4s;
6113 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6114 Opc = AArch64::LD3Threev2d;
6115 else if (Ty == S64 || Ty == P0)
6116 Opc = AArch64::LD1Threev1d;
6117 else
6118 llvm_unreachable("Unexpected type for ld3!");
6119 selectVectorLoadIntrinsic(Opc, 3, I);
6120 break;
6121 }
6122 case Intrinsic::aarch64_neon_ld3lane: {
6123 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6124 unsigned Opc;
6125 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6126 Opc = AArch64::LD3i8;
6127 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6128 Opc = AArch64::LD3i16;
6129 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6130 Opc = AArch64::LD3i32;
6131 else if (Ty == LLT::fixed_vector(2, S64) ||
6132 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6133 Opc = AArch64::LD3i64;
6134 else
6135 llvm_unreachable("Unexpected type for st3lane!");
6136 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6137 return false;
6138 break;
6139 }
6140 case Intrinsic::aarch64_neon_ld3r: {
6141 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6142 unsigned Opc = 0;
6143 if (Ty == LLT::fixed_vector(8, S8))
6144 Opc = AArch64::LD3Rv8b;
6145 else if (Ty == LLT::fixed_vector(16, S8))
6146 Opc = AArch64::LD3Rv16b;
6147 else if (Ty == LLT::fixed_vector(4, S16))
6148 Opc = AArch64::LD3Rv4h;
6149 else if (Ty == LLT::fixed_vector(8, S16))
6150 Opc = AArch64::LD3Rv8h;
6151 else if (Ty == LLT::fixed_vector(2, S32))
6152 Opc = AArch64::LD3Rv2s;
6153 else if (Ty == LLT::fixed_vector(4, S32))
6154 Opc = AArch64::LD3Rv4s;
6155 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6156 Opc = AArch64::LD3Rv2d;
6157 else if (Ty == S64 || Ty == P0)
6158 Opc = AArch64::LD3Rv1d;
6159 else
6160 llvm_unreachable("Unexpected type for ld3r!");
6161 selectVectorLoadIntrinsic(Opc, 3, I);
6162 break;
6163 }
6164 case Intrinsic::aarch64_neon_ld4: {
6165 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6166 unsigned Opc = 0;
6167 if (Ty == LLT::fixed_vector(8, S8))
6168 Opc = AArch64::LD4Fourv8b;
6169 else if (Ty == LLT::fixed_vector(16, S8))
6170 Opc = AArch64::LD4Fourv16b;
6171 else if (Ty == LLT::fixed_vector(4, S16))
6172 Opc = AArch64::LD4Fourv4h;
6173 else if (Ty == LLT::fixed_vector(8, S16))
6174 Opc = AArch64::LD4Fourv8h;
6175 else if (Ty == LLT::fixed_vector(2, S32))
6176 Opc = AArch64::LD4Fourv2s;
6177 else if (Ty == LLT::fixed_vector(4, S32))
6178 Opc = AArch64::LD4Fourv4s;
6179 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6180 Opc = AArch64::LD4Fourv2d;
6181 else if (Ty == S64 || Ty == P0)
6182 Opc = AArch64::LD1Fourv1d;
6183 else
6184 llvm_unreachable("Unexpected type for ld4!");
6185 selectVectorLoadIntrinsic(Opc, 4, I);
6186 break;
6187 }
6188 case Intrinsic::aarch64_neon_ld4lane: {
6189 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6190 unsigned Opc;
6191 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6192 Opc = AArch64::LD4i8;
6193 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6194 Opc = AArch64::LD4i16;
6195 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6196 Opc = AArch64::LD4i32;
6197 else if (Ty == LLT::fixed_vector(2, S64) ||
6198 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6199 Opc = AArch64::LD4i64;
6200 else
6201 llvm_unreachable("Unexpected type for st4lane!");
6202 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6203 return false;
6204 break;
6205 }
6206 case Intrinsic::aarch64_neon_ld4r: {
6207 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6208 unsigned Opc = 0;
6209 if (Ty == LLT::fixed_vector(8, S8))
6210 Opc = AArch64::LD4Rv8b;
6211 else if (Ty == LLT::fixed_vector(16, S8))
6212 Opc = AArch64::LD4Rv16b;
6213 else if (Ty == LLT::fixed_vector(4, S16))
6214 Opc = AArch64::LD4Rv4h;
6215 else if (Ty == LLT::fixed_vector(8, S16))
6216 Opc = AArch64::LD4Rv8h;
6217 else if (Ty == LLT::fixed_vector(2, S32))
6218 Opc = AArch64::LD4Rv2s;
6219 else if (Ty == LLT::fixed_vector(4, S32))
6220 Opc = AArch64::LD4Rv4s;
6221 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6222 Opc = AArch64::LD4Rv2d;
6223 else if (Ty == S64 || Ty == P0)
6224 Opc = AArch64::LD4Rv1d;
6225 else
6226 llvm_unreachable("Unexpected type for ld4r!");
6227 selectVectorLoadIntrinsic(Opc, 4, I);
6228 break;
6229 }
6230 case Intrinsic::aarch64_neon_st1x2: {
6231 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6232 unsigned Opc;
6233 if (Ty == LLT::fixed_vector(8, S8))
6234 Opc = AArch64::ST1Twov8b;
6235 else if (Ty == LLT::fixed_vector(16, S8))
6236 Opc = AArch64::ST1Twov16b;
6237 else if (Ty == LLT::fixed_vector(4, S16))
6238 Opc = AArch64::ST1Twov4h;
6239 else if (Ty == LLT::fixed_vector(8, S16))
6240 Opc = AArch64::ST1Twov8h;
6241 else if (Ty == LLT::fixed_vector(2, S32))
6242 Opc = AArch64::ST1Twov2s;
6243 else if (Ty == LLT::fixed_vector(4, S32))
6244 Opc = AArch64::ST1Twov4s;
6245 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6246 Opc = AArch64::ST1Twov2d;
6247 else if (Ty == S64 || Ty == P0)
6248 Opc = AArch64::ST1Twov1d;
6249 else
6250 llvm_unreachable("Unexpected type for st1x2!");
6251 selectVectorStoreIntrinsic(I, 2, Opc);
6252 break;
6253 }
6254 case Intrinsic::aarch64_neon_st1x3: {
6255 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6256 unsigned Opc;
6257 if (Ty == LLT::fixed_vector(8, S8))
6258 Opc = AArch64::ST1Threev8b;
6259 else if (Ty == LLT::fixed_vector(16, S8))
6260 Opc = AArch64::ST1Threev16b;
6261 else if (Ty == LLT::fixed_vector(4, S16))
6262 Opc = AArch64::ST1Threev4h;
6263 else if (Ty == LLT::fixed_vector(8, S16))
6264 Opc = AArch64::ST1Threev8h;
6265 else if (Ty == LLT::fixed_vector(2, S32))
6266 Opc = AArch64::ST1Threev2s;
6267 else if (Ty == LLT::fixed_vector(4, S32))
6268 Opc = AArch64::ST1Threev4s;
6269 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6270 Opc = AArch64::ST1Threev2d;
6271 else if (Ty == S64 || Ty == P0)
6272 Opc = AArch64::ST1Threev1d;
6273 else
6274 llvm_unreachable("Unexpected type for st1x3!");
6275 selectVectorStoreIntrinsic(I, 3, Opc);
6276 break;
6277 }
6278 case Intrinsic::aarch64_neon_st1x4: {
6279 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6280 unsigned Opc;
6281 if (Ty == LLT::fixed_vector(8, S8))
6282 Opc = AArch64::ST1Fourv8b;
6283 else if (Ty == LLT::fixed_vector(16, S8))
6284 Opc = AArch64::ST1Fourv16b;
6285 else if (Ty == LLT::fixed_vector(4, S16))
6286 Opc = AArch64::ST1Fourv4h;
6287 else if (Ty == LLT::fixed_vector(8, S16))
6288 Opc = AArch64::ST1Fourv8h;
6289 else if (Ty == LLT::fixed_vector(2, S32))
6290 Opc = AArch64::ST1Fourv2s;
6291 else if (Ty == LLT::fixed_vector(4, S32))
6292 Opc = AArch64::ST1Fourv4s;
6293 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6294 Opc = AArch64::ST1Fourv2d;
6295 else if (Ty == S64 || Ty == P0)
6296 Opc = AArch64::ST1Fourv1d;
6297 else
6298 llvm_unreachable("Unexpected type for st1x4!");
6299 selectVectorStoreIntrinsic(I, 4, Opc);
6300 break;
6301 }
6302 case Intrinsic::aarch64_neon_st2: {
6303 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6304 unsigned Opc;
6305 if (Ty == LLT::fixed_vector(8, S8))
6306 Opc = AArch64::ST2Twov8b;
6307 else if (Ty == LLT::fixed_vector(16, S8))
6308 Opc = AArch64::ST2Twov16b;
6309 else if (Ty == LLT::fixed_vector(4, S16))
6310 Opc = AArch64::ST2Twov4h;
6311 else if (Ty == LLT::fixed_vector(8, S16))
6312 Opc = AArch64::ST2Twov8h;
6313 else if (Ty == LLT::fixed_vector(2, S32))
6314 Opc = AArch64::ST2Twov2s;
6315 else if (Ty == LLT::fixed_vector(4, S32))
6316 Opc = AArch64::ST2Twov4s;
6317 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6318 Opc = AArch64::ST2Twov2d;
6319 else if (Ty == S64 || Ty == P0)
6320 Opc = AArch64::ST1Twov1d;
6321 else
6322 llvm_unreachable("Unexpected type for st2!");
6323 selectVectorStoreIntrinsic(I, 2, Opc);
6324 break;
6325 }
6326 case Intrinsic::aarch64_neon_st3: {
6327 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6328 unsigned Opc;
6329 if (Ty == LLT::fixed_vector(8, S8))
6330 Opc = AArch64::ST3Threev8b;
6331 else if (Ty == LLT::fixed_vector(16, S8))
6332 Opc = AArch64::ST3Threev16b;
6333 else if (Ty == LLT::fixed_vector(4, S16))
6334 Opc = AArch64::ST3Threev4h;
6335 else if (Ty == LLT::fixed_vector(8, S16))
6336 Opc = AArch64::ST3Threev8h;
6337 else if (Ty == LLT::fixed_vector(2, S32))
6338 Opc = AArch64::ST3Threev2s;
6339 else if (Ty == LLT::fixed_vector(4, S32))
6340 Opc = AArch64::ST3Threev4s;
6341 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6342 Opc = AArch64::ST3Threev2d;
6343 else if (Ty == S64 || Ty == P0)
6344 Opc = AArch64::ST1Threev1d;
6345 else
6346 llvm_unreachable("Unexpected type for st3!");
6347 selectVectorStoreIntrinsic(I, 3, Opc);
6348 break;
6349 }
6350 case Intrinsic::aarch64_neon_st4: {
6351 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6352 unsigned Opc;
6353 if (Ty == LLT::fixed_vector(8, S8))
6354 Opc = AArch64::ST4Fourv8b;
6355 else if (Ty == LLT::fixed_vector(16, S8))
6356 Opc = AArch64::ST4Fourv16b;
6357 else if (Ty == LLT::fixed_vector(4, S16))
6358 Opc = AArch64::ST4Fourv4h;
6359 else if (Ty == LLT::fixed_vector(8, S16))
6360 Opc = AArch64::ST4Fourv8h;
6361 else if (Ty == LLT::fixed_vector(2, S32))
6362 Opc = AArch64::ST4Fourv2s;
6363 else if (Ty == LLT::fixed_vector(4, S32))
6364 Opc = AArch64::ST4Fourv4s;
6365 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6366 Opc = AArch64::ST4Fourv2d;
6367 else if (Ty == S64 || Ty == P0)
6368 Opc = AArch64::ST1Fourv1d;
6369 else
6370 llvm_unreachable("Unexpected type for st4!");
6371 selectVectorStoreIntrinsic(I, 4, Opc);
6372 break;
6373 }
6374 case Intrinsic::aarch64_neon_st2lane: {
6375 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6376 unsigned Opc;
6377 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6378 Opc = AArch64::ST2i8;
6379 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6380 Opc = AArch64::ST2i16;
6381 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6382 Opc = AArch64::ST2i32;
6383 else if (Ty == LLT::fixed_vector(2, S64) ||
6384 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6385 Opc = AArch64::ST2i64;
6386 else
6387 llvm_unreachable("Unexpected type for st2lane!");
6388 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6389 return false;
6390 break;
6391 }
6392 case Intrinsic::aarch64_neon_st3lane: {
6393 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6394 unsigned Opc;
6395 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6396 Opc = AArch64::ST3i8;
6397 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6398 Opc = AArch64::ST3i16;
6399 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6400 Opc = AArch64::ST3i32;
6401 else if (Ty == LLT::fixed_vector(2, S64) ||
6402 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6403 Opc = AArch64::ST3i64;
6404 else
6405 llvm_unreachable("Unexpected type for st3lane!");
6406 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6407 return false;
6408 break;
6409 }
6410 case Intrinsic::aarch64_neon_st4lane: {
6411 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6412 unsigned Opc;
6413 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6414 Opc = AArch64::ST4i8;
6415 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6416 Opc = AArch64::ST4i16;
6417 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6418 Opc = AArch64::ST4i32;
6419 else if (Ty == LLT::fixed_vector(2, S64) ||
6420 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6421 Opc = AArch64::ST4i64;
6422 else
6423 llvm_unreachable("Unexpected type for st4lane!");
6424 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6425 return false;
6426 break;
6427 }
6428 case Intrinsic::aarch64_mops_memset_tag: {
6429 // Transform
6430 // %dst:gpr(p0) = \
6431 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6432 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6433 // where %dst is updated, into
6434 // %Rd:GPR64common, %Rn:GPR64) = \
6435 // MOPSMemorySetTaggingPseudo \
6436 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6437 // where Rd and Rn are tied.
6438 // It is expected that %val has been extended to s64 in legalization.
6439 // Note that the order of the size/value operands are swapped.
6440
6441 Register DstDef = I.getOperand(0).getReg();
6442 // I.getOperand(1) is the intrinsic function
6443 Register DstUse = I.getOperand(2).getReg();
6444 Register ValUse = I.getOperand(3).getReg();
6445 Register SizeUse = I.getOperand(4).getReg();
6446
6447 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6448 // Therefore an additional virtual register is requried for the updated size
6449 // operand. This value is not accessible via the semantics of the intrinsic.
6450 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6451
6452 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6453 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6454 Memset.cloneMemRefs(I);
6456 break;
6457 }
6458 }
6459
6460 I.eraseFromParent();
6461 return true;
6462}
6463
6464bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6466 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6467
6468 switch (IntrinID) {
6469 default:
6470 break;
6471 case Intrinsic::aarch64_crypto_sha1h: {
6472 Register DstReg = I.getOperand(0).getReg();
6473 Register SrcReg = I.getOperand(2).getReg();
6474
6475 // FIXME: Should this be an assert?
6476 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6477 MRI.getType(SrcReg).getSizeInBits() != 32)
6478 return false;
6479
6480 // The operation has to happen on FPRs. Set up some new FPR registers for
6481 // the source and destination if they are on GPRs.
6482 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6483 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6484 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6485
6486 // Make sure the copy ends up getting constrained properly.
6487 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6488 AArch64::GPR32RegClass, MRI);
6489 }
6490
6491 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6492 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6493
6494 // Actually insert the instruction.
6495 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6496 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6497
6498 // Did we create a new register for the destination?
6499 if (DstReg != I.getOperand(0).getReg()) {
6500 // Yep. Copy the result of the instruction back into the original
6501 // destination.
6502 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6503 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6504 AArch64::GPR32RegClass, MRI);
6505 }
6506
6507 I.eraseFromParent();
6508 return true;
6509 }
6510 case Intrinsic::frameaddress:
6511 case Intrinsic::returnaddress: {
6512 MachineFunction &MF = *I.getParent()->getParent();
6513 MachineFrameInfo &MFI = MF.getFrameInfo();
6514
6515 unsigned Depth = I.getOperand(2).getImm();
6516 Register DstReg = I.getOperand(0).getReg();
6517 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6518
6519 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6520 if (!MFReturnAddr) {
6521 // Insert the copy from LR/X30 into the entry block, before it can be
6522 // clobbered by anything.
6523 MFI.setReturnAddressIsTaken(true);
6524 MFReturnAddr = getFunctionLiveInPhysReg(
6525 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6526 }
6527
6528 if (STI.hasPAuth()) {
6529 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6530 } else {
6531 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6532 MIB.buildInstr(AArch64::XPACLRI);
6533 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6534 }
6535
6536 I.eraseFromParent();
6537 return true;
6538 }
6539
6540 MFI.setFrameAddressIsTaken(true);
6541 Register FrameAddr(AArch64::FP);
6542 while (Depth--) {
6543 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6544 auto Ldr =
6545 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6547 FrameAddr = NextFrame;
6548 }
6549
6550 if (IntrinID == Intrinsic::frameaddress)
6551 MIB.buildCopy({DstReg}, {FrameAddr});
6552 else {
6553 MFI.setReturnAddressIsTaken(true);
6554
6555 if (STI.hasPAuth()) {
6556 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6557 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6558 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6559 } else {
6560 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6561 .addImm(1);
6562 MIB.buildInstr(AArch64::XPACLRI);
6563 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6564 }
6565 }
6566
6567 I.eraseFromParent();
6568 return true;
6569 }
6570 case Intrinsic::aarch64_neon_tbl2:
6571 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6572 return true;
6573 case Intrinsic::aarch64_neon_tbl3:
6574 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6575 false);
6576 return true;
6577 case Intrinsic::aarch64_neon_tbl4:
6578 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6579 return true;
6580 case Intrinsic::aarch64_neon_tbx2:
6581 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6582 return true;
6583 case Intrinsic::aarch64_neon_tbx3:
6584 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6585 return true;
6586 case Intrinsic::aarch64_neon_tbx4:
6587 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6588 return true;
6589 case Intrinsic::swift_async_context_addr:
6590 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6591 {Register(AArch64::FP)})
6592 .addImm(8)
6593 .addImm(0);
6595
6597 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6598 I.eraseFromParent();
6599 return true;
6600 }
6601 return false;
6602}
6603
6604// G_PTRAUTH_GLOBAL_VALUE lowering
6605//
6606// We have 3 lowering alternatives to choose from:
6607// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6608// If the GV doesn't need a GOT load (i.e., is locally defined)
6609// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6610//
6611// - LOADgotPAC: similar to LOADgot, with added PAC.
6612// If the GV needs a GOT load, materialize the pointer using the usual
6613// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6614// section is assumed to be read-only (for example, via relro mechanism). See
6615// LowerMOVaddrPAC.
6616//
6617// - LOADauthptrstatic: similar to LOADgot, but use a
6618// special stub slot instead of a GOT slot.
6619// Load a signed pointer for symbol 'sym' from a stub slot named
6620// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6621// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6622// .data with an
6623// @AUTH relocation. See LowerLOADauthptrstatic.
6624//
6625// All 3 are pseudos that are expand late to longer sequences: this lets us
6626// provide integrity guarantees on the to-be-signed intermediate values.
6627//
6628// LOADauthptrstatic is undesirable because it requires a large section filled
6629// with often similarly-signed pointers, making it a good harvesting target.
6630// Thus, it's only used for ptrauth references to extern_weak to avoid null
6631// checks.
6632
6633bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6635 Register DefReg = I.getOperand(0).getReg();
6636 Register Addr = I.getOperand(1).getReg();
6637 uint64_t Key = I.getOperand(2).getImm();
6638 Register AddrDisc = I.getOperand(3).getReg();
6639 uint64_t Disc = I.getOperand(4).getImm();
6640 int64_t Offset = 0;
6641
6642 if (Key > AArch64PACKey::LAST)
6643 report_fatal_error("key in ptrauth global out of range [0, " +
6644 Twine((int)AArch64PACKey::LAST) + "]");
6645
6646 // Blend only works if the integer discriminator is 16-bit wide.
6647 if (!isUInt<16>(Disc))
6649 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6650
6651 // Choosing between 3 lowering alternatives is target-specific.
6652 if (!STI.isTargetELF() && !STI.isTargetMachO())
6653 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6654
6655 if (!MRI.hasOneDef(Addr))
6656 return false;
6657
6658 // First match any offset we take from the real global.
6659 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6660 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6661 Register OffsetReg = DefMI->getOperand(2).getReg();
6662 if (!MRI.hasOneDef(OffsetReg))
6663 return false;
6664 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6665 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6666 return false;
6667
6668 Addr = DefMI->getOperand(1).getReg();
6669 if (!MRI.hasOneDef(Addr))
6670 return false;
6671
6672 DefMI = &*MRI.def_instr_begin(Addr);
6673 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6674 }
6675
6676 // We should be left with a genuine unauthenticated GlobalValue.
6677 const GlobalValue *GV;
6678 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6679 GV = DefMI->getOperand(1).getGlobal();
6681 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6682 GV = DefMI->getOperand(2).getGlobal();
6684 } else {
6685 return false;
6686 }
6687
6688 MachineIRBuilder MIB(I);
6689
6690 // Classify the reference to determine whether it needs a GOT load.
6691 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6692 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6693 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6694 "unsupported non-GOT op flags on ptrauth global reference");
6695 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6696 "unsupported non-GOT reference to weak ptrauth global");
6697
6698 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6699 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6700
6701 // Non-extern_weak:
6702 // - No GOT load needed -> MOVaddrPAC
6703 // - GOT load for non-extern_weak -> LOADgotPAC
6704 // Note that we disallow extern_weak refs to avoid null checks later.
6705 if (!GV->hasExternalWeakLinkage()) {
6706 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6707 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6708 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6710 .addImm(Key)
6711 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6712 .addImm(Disc)
6713 .constrainAllUses(TII, TRI, RBI);
6714 MIB.buildCopy(DefReg, Register(AArch64::X16));
6715 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6716 I.eraseFromParent();
6717 return true;
6718 }
6719
6720 // extern_weak -> LOADauthptrstatic
6721
6722 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6723 // offset alone as a pointer if the symbol wasn't available, which would
6724 // probably break null checks in users. Ptrauth complicates things further:
6725 // error out.
6726 if (Offset != 0)
6728 "unsupported non-zero offset in weak ptrauth global reference");
6729
6730 if (HasAddrDisc)
6731 report_fatal_error("unsupported weak addr-div ptrauth global");
6732
6733 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6734 .addGlobalAddress(GV, Offset)
6735 .addImm(Key)
6736 .addImm(Disc);
6737 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6738
6739 I.eraseFromParent();
6740 return true;
6741}
6742
6743void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6745 unsigned NumVec, unsigned Opc1,
6746 unsigned Opc2, bool isExt) {
6747 Register DstReg = I.getOperand(0).getReg();
6748 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6749
6750 // Create the REG_SEQUENCE
6752 for (unsigned i = 0; i < NumVec; i++)
6753 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6754 Register RegSeq = createQTuple(Regs, MIB);
6755
6756 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6758 if (isExt) {
6759 Register Reg = I.getOperand(2).getReg();
6760 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6761 } else
6762 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6764 I.eraseFromParent();
6765}
6766
6768AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6769 auto MaybeImmed = getImmedFromMO(Root);
6770 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6771 return std::nullopt;
6772 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6773 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6774}
6775
6777AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6778 auto MaybeImmed = getImmedFromMO(Root);
6779 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6780 return std::nullopt;
6781 uint64_t Enc = 31 - *MaybeImmed;
6782 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6783}
6784
6786AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6787 auto MaybeImmed = getImmedFromMO(Root);
6788 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6789 return std::nullopt;
6790 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6791 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6792}
6793
6795AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6796 auto MaybeImmed = getImmedFromMO(Root);
6797 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6798 return std::nullopt;
6799 uint64_t Enc = 63 - *MaybeImmed;
6800 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6801}
6802
6803/// Helper to select an immediate value that can be represented as a 12-bit
6804/// value shifted left by either 0 or 12. If it is possible to do so, return
6805/// the immediate and shift value. If not, return std::nullopt.
6806///
6807/// Used by selectArithImmed and selectNegArithImmed.
6809AArch64InstructionSelector::select12BitValueWithLeftShift(
6810 uint64_t Immed) const {
6811 unsigned ShiftAmt;
6812 if (Immed >> 12 == 0) {
6813 ShiftAmt = 0;
6814 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6815 ShiftAmt = 12;
6816 Immed = Immed >> 12;
6817 } else
6818 return std::nullopt;
6819
6820 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6821 return {{
6822 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6823 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6824 }};
6825}
6826
6827/// SelectArithImmed - Select an immediate value that can be represented as
6828/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6829/// Val set to the 12-bit value and Shift set to the shifter operand.
6831AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6832 // This function is called from the addsub_shifted_imm ComplexPattern,
6833 // which lists [imm] as the list of opcode it's interested in, however
6834 // we still need to check whether the operand is actually an immediate
6835 // here because the ComplexPattern opcode list is only used in
6836 // root-level opcode matching.
6837 auto MaybeImmed = getImmedFromMO(Root);
6838 if (MaybeImmed == std::nullopt)
6839 return std::nullopt;
6840 return select12BitValueWithLeftShift(*MaybeImmed);
6841}
6842
6843/// SelectNegArithImmed - As above, but negates the value before trying to
6844/// select it.
6846AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6847 // We need a register here, because we need to know if we have a 64 or 32
6848 // bit immediate.
6849 if (!Root.isReg())
6850 return std::nullopt;
6851 auto MaybeImmed = getImmedFromMO(Root);
6852 if (MaybeImmed == std::nullopt)
6853 return std::nullopt;
6854 uint64_t Immed = *MaybeImmed;
6855
6856 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6857 // have the opposite effect on the C flag, so this pattern mustn't match under
6858 // those circumstances.
6859 if (Immed == 0)
6860 return std::nullopt;
6861
6862 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6863 // the root.
6865 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6866 Immed = ~((uint32_t)Immed) + 1;
6867 else
6868 Immed = ~Immed + 1ULL;
6869
6870 if (Immed & 0xFFFFFFFFFF000000ULL)
6871 return std::nullopt;
6872
6873 Immed &= 0xFFFFFFULL;
6874 return select12BitValueWithLeftShift(Immed);
6875}
6876
6877/// Checks if we are sure that folding MI into load/store addressing mode is
6878/// beneficial or not.
6879///
6880/// Returns:
6881/// - true if folding MI would be beneficial.
6882/// - false if folding MI would be bad.
6883/// - std::nullopt if it is not sure whether folding MI is beneficial.
6884///
6885/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
6886///
6887/// %13:gpr(s64) = G_CONSTANT i64 1
6888/// %8:gpr(s64) = G_SHL %6, %13(s64)
6889/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
6890/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
6891std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
6892 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6893 if (MI.getOpcode() == AArch64::G_SHL) {
6894 // Address operands with shifts are free, except for running on subtargets
6895 // with AddrLSLSlow14.
6896 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
6897 MI.getOperand(2).getReg(), MRI)) {
6898 const APInt ShiftVal = ValAndVeg->Value;
6899
6900 // Don't fold if we know this will be slow.
6901 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
6902 }
6903 }
6904 return std::nullopt;
6905}
6906
6907/// Return true if it is worth folding MI into an extended register. That is,
6908/// if it's safe to pull it into the addressing mode of a load or store as a
6909/// shift.
6910/// \p IsAddrOperand whether the def of MI is used as an address operand
6911/// (e.g. feeding into an LDR/STR).
6912bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6914 bool IsAddrOperand) const {
6915
6916 // Always fold if there is one use, or if we're optimizing for size.
6917 Register DefReg = MI.getOperand(0).getReg();
6918 if (MRI.hasOneNonDBGUse(DefReg) ||
6919 MI.getParent()->getParent()->getFunction().hasOptSize())
6920 return true;
6921
6922 if (IsAddrOperand) {
6923 // If we are already sure that folding MI is good or bad, return the result.
6924 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
6925 return *Worth;
6926
6927 // Fold G_PTR_ADD if its offset operand can be folded
6928 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
6929 MachineInstr *OffsetInst =
6930 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
6931
6932 // Note, we already know G_PTR_ADD is used by at least two instructions.
6933 // If we are also sure about whether folding is beneficial or not,
6934 // return the result.
6935 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
6936 return *Worth;
6937 }
6938 }
6939
6940 // FIXME: Consider checking HasALULSLFast as appropriate.
6941
6942 // We have a fastpath, so folding a shift in and potentially computing it
6943 // many times may be beneficial. Check if this is only used in memory ops.
6944 // If it is, then we should fold.
6945 return all_of(MRI.use_nodbg_instructions(DefReg),
6946 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6947}
6948
6950 switch (Type) {
6951 case AArch64_AM::SXTB:
6952 case AArch64_AM::SXTH:
6953 case AArch64_AM::SXTW:
6954 return true;
6955 default:
6956 return false;
6957 }
6958}
6959
6961AArch64InstructionSelector::selectExtendedSHL(
6963 unsigned SizeInBytes, bool WantsExt) const {
6964 assert(Base.isReg() && "Expected base to be a register operand");
6965 assert(Offset.isReg() && "Expected offset to be a register operand");
6966
6968 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6969
6970 unsigned OffsetOpc = OffsetInst->getOpcode();
6971 bool LookedThroughZExt = false;
6972 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6973 // Try to look through a ZEXT.
6974 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6975 return std::nullopt;
6976
6977 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6978 OffsetOpc = OffsetInst->getOpcode();
6979 LookedThroughZExt = true;
6980
6981 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6982 return std::nullopt;
6983 }
6984 // Make sure that the memory op is a valid size.
6985 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6986 if (LegalShiftVal == 0)
6987 return std::nullopt;
6988 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
6989 return std::nullopt;
6990
6991 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6992 // register we will offset is the LHS, and the register containing the
6993 // constant is the RHS.
6994 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6995 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6996 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6997 if (!ValAndVReg) {
6998 // We didn't get a constant on the RHS. If the opcode is a shift, then
6999 // we're done.
7000 if (OffsetOpc == TargetOpcode::G_SHL)
7001 return std::nullopt;
7002
7003 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7004 std::swap(OffsetReg, ConstantReg);
7005 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7006 if (!ValAndVReg)
7007 return std::nullopt;
7008 }
7009
7010 // The value must fit into 3 bits, and must be positive. Make sure that is
7011 // true.
7012 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7013
7014 // Since we're going to pull this into a shift, the constant value must be
7015 // a power of 2. If we got a multiply, then we need to check this.
7016 if (OffsetOpc == TargetOpcode::G_MUL) {
7017 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7018 return std::nullopt;
7019
7020 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7021 ImmVal = Log2_32(ImmVal);
7022 }
7023
7024 if ((ImmVal & 0x7) != ImmVal)
7025 return std::nullopt;
7026
7027 // We are only allowed to shift by LegalShiftVal. This shift value is built
7028 // into the instruction, so we can't just use whatever we want.
7029 if (ImmVal != LegalShiftVal)
7030 return std::nullopt;
7031
7032 unsigned SignExtend = 0;
7033 if (WantsExt) {
7034 // Check if the offset is defined by an extend, unless we looked through a
7035 // G_ZEXT earlier.
7036 if (!LookedThroughZExt) {
7037 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7038 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7040 return std::nullopt;
7041
7042 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7043 // We only support SXTW for signed extension here.
7044 if (SignExtend && Ext != AArch64_AM::SXTW)
7045 return std::nullopt;
7046 OffsetReg = ExtInst->getOperand(1).getReg();
7047 }
7048
7049 // Need a 32-bit wide register here.
7050 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7051 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7052 }
7053
7054 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7055 // offset. Signify that we are shifting by setting the shift flag to 1.
7056 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7057 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7058 [=](MachineInstrBuilder &MIB) {
7059 // Need to add both immediates here to make sure that they are both
7060 // added to the instruction.
7061 MIB.addImm(SignExtend);
7062 MIB.addImm(1);
7063 }}};
7064}
7065
7066/// This is used for computing addresses like this:
7067///
7068/// ldr x1, [x2, x3, lsl #3]
7069///
7070/// Where x2 is the base register, and x3 is an offset register. The shift-left
7071/// is a constant value specific to this load instruction. That is, we'll never
7072/// see anything other than a 3 here (which corresponds to the size of the
7073/// element being loaded.)
7075AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7076 MachineOperand &Root, unsigned SizeInBytes) const {
7077 if (!Root.isReg())
7078 return std::nullopt;
7080
7081 // We want to find something like this:
7082 //
7083 // val = G_CONSTANT LegalShiftVal
7084 // shift = G_SHL off_reg val
7085 // ptr = G_PTR_ADD base_reg shift
7086 // x = G_LOAD ptr
7087 //
7088 // And fold it into this addressing mode:
7089 //
7090 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7091
7092 // Check if we can find the G_PTR_ADD.
7093 MachineInstr *PtrAdd =
7094 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7095 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7096 return std::nullopt;
7097
7098 // Now, try to match an opcode which will match our specific offset.
7099 // We want a G_SHL or a G_MUL.
7100 MachineInstr *OffsetInst =
7102 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7103 OffsetInst->getOperand(0), SizeInBytes,
7104 /*WantsExt=*/false);
7105}
7106
7107/// This is used for computing addresses like this:
7108///
7109/// ldr x1, [x2, x3]
7110///
7111/// Where x2 is the base register, and x3 is an offset register.
7112///
7113/// When possible (or profitable) to fold a G_PTR_ADD into the address
7114/// calculation, this will do so. Otherwise, it will return std::nullopt.
7116AArch64InstructionSelector::selectAddrModeRegisterOffset(
7117 MachineOperand &Root) const {
7119
7120 // We need a GEP.
7121 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7122 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7123 return std::nullopt;
7124
7125 // If this is used more than once, let's not bother folding.
7126 // TODO: Check if they are memory ops. If they are, then we can still fold
7127 // without having to recompute anything.
7128 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7129 return std::nullopt;
7130
7131 // Base is the GEP's LHS, offset is its RHS.
7132 return {{[=](MachineInstrBuilder &MIB) {
7133 MIB.addUse(Gep->getOperand(1).getReg());
7134 },
7135 [=](MachineInstrBuilder &MIB) {
7136 MIB.addUse(Gep->getOperand(2).getReg());
7137 },
7138 [=](MachineInstrBuilder &MIB) {
7139 // Need to add both immediates here to make sure that they are both
7140 // added to the instruction.
7141 MIB.addImm(0);
7142 MIB.addImm(0);
7143 }}};
7144}
7145
7146/// This is intended to be equivalent to selectAddrModeXRO in
7147/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7149AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7150 unsigned SizeInBytes) const {
7152 if (!Root.isReg())
7153 return std::nullopt;
7154 MachineInstr *PtrAdd =
7155 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7156 if (!PtrAdd)
7157 return std::nullopt;
7158
7159 // Check for an immediates which cannot be encoded in the [base + imm]
7160 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7161 // end up with code like:
7162 //
7163 // mov x0, wide
7164 // add x1 base, x0
7165 // ldr x2, [x1, x0]
7166 //
7167 // In this situation, we can use the [base, xreg] addressing mode to save an
7168 // add/sub:
7169 //
7170 // mov x0, wide
7171 // ldr x2, [base, x0]
7172 auto ValAndVReg =
7174 if (ValAndVReg) {
7175 unsigned Scale = Log2_32(SizeInBytes);
7176 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7177
7178 // Skip immediates that can be selected in the load/store addresing
7179 // mode.
7180 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7181 ImmOff < (0x1000 << Scale))
7182 return std::nullopt;
7183
7184 // Helper lambda to decide whether or not it is preferable to emit an add.
7185 auto isPreferredADD = [](int64_t ImmOff) {
7186 // Constants in [0x0, 0xfff] can be encoded in an add.
7187 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7188 return true;
7189
7190 // Can it be encoded in an add lsl #12?
7191 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7192 return false;
7193
7194 // It can be encoded in an add lsl #12, but we may not want to. If it is
7195 // possible to select this as a single movz, then prefer that. A single
7196 // movz is faster than an add with a shift.
7197 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7198 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7199 };
7200
7201 // If the immediate can be encoded in a single add/sub, then bail out.
7202 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7203 return std::nullopt;
7204 }
7205
7206 // Try to fold shifts into the addressing mode.
7207 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7208 if (AddrModeFns)
7209 return AddrModeFns;
7210
7211 // If that doesn't work, see if it's possible to fold in registers from
7212 // a GEP.
7213 return selectAddrModeRegisterOffset(Root);
7214}
7215
7216/// This is used for computing addresses like this:
7217///
7218/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7219///
7220/// Where we have a 64-bit base register, a 32-bit offset register, and an
7221/// extend (which may or may not be signed).
7223AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7224 unsigned SizeInBytes) const {
7226
7227 MachineInstr *PtrAdd =
7228 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7229 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7230 return std::nullopt;
7231
7232 MachineOperand &LHS = PtrAdd->getOperand(1);
7233 MachineOperand &RHS = PtrAdd->getOperand(2);
7234 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7235
7236 // The first case is the same as selectAddrModeXRO, except we need an extend.
7237 // In this case, we try to find a shift and extend, and fold them into the
7238 // addressing mode.
7239 //
7240 // E.g.
7241 //
7242 // off_reg = G_Z/S/ANYEXT ext_reg
7243 // val = G_CONSTANT LegalShiftVal
7244 // shift = G_SHL off_reg val
7245 // ptr = G_PTR_ADD base_reg shift
7246 // x = G_LOAD ptr
7247 //
7248 // In this case we can get a load like this:
7249 //
7250 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7251 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7252 SizeInBytes, /*WantsExt=*/true);
7253 if (ExtendedShl)
7254 return ExtendedShl;
7255
7256 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7257 //
7258 // e.g.
7259 // ldr something, [base_reg, ext_reg, sxtw]
7260 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7261 return std::nullopt;
7262
7263 // Check if this is an extend. We'll get an extend type if it is.
7265 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7267 return std::nullopt;
7268
7269 // Need a 32-bit wide register.
7270 MachineIRBuilder MIB(*PtrAdd);
7271 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7272 AArch64::GPR32RegClass, MIB);
7273 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7274
7275 // Base is LHS, offset is ExtReg.
7276 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7277 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7278 [=](MachineInstrBuilder &MIB) {
7279 MIB.addImm(SignExtend);
7280 MIB.addImm(0);
7281 }}};
7282}
7283
7284/// Select a "register plus unscaled signed 9-bit immediate" address. This
7285/// should only match when there is an offset that is not valid for a scaled
7286/// immediate addressing mode. The "Size" argument is the size in bytes of the
7287/// memory reference, which is needed here to know what is valid for a scaled
7288/// immediate.
7290AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7291 unsigned Size) const {
7293 Root.getParent()->getParent()->getParent()->getRegInfo();
7294
7295 if (!Root.isReg())
7296 return std::nullopt;
7297
7298 if (!isBaseWithConstantOffset(Root, MRI))
7299 return std::nullopt;
7300
7301 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7302
7303 MachineOperand &OffImm = RootDef->getOperand(2);
7304 if (!OffImm.isReg())
7305 return std::nullopt;
7306 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7307 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7308 return std::nullopt;
7309 int64_t RHSC;
7310 MachineOperand &RHSOp1 = RHS->getOperand(1);
7311 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7312 return std::nullopt;
7313 RHSC = RHSOp1.getCImm()->getSExtValue();
7314
7315 if (RHSC >= -256 && RHSC < 256) {
7316 MachineOperand &Base = RootDef->getOperand(1);
7317 return {{
7318 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7319 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7320 }};
7321 }
7322 return std::nullopt;
7323}
7324
7326AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7327 unsigned Size,
7328 MachineRegisterInfo &MRI) const {
7329 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7330 return std::nullopt;
7331 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7332 if (Adrp.getOpcode() != AArch64::ADRP)
7333 return std::nullopt;
7334
7335 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7336 auto Offset = Adrp.getOperand(1).getOffset();
7337 if (Offset % Size != 0)
7338 return std::nullopt;
7339
7340 auto GV = Adrp.getOperand(1).getGlobal();
7341 if (GV->isThreadLocal())
7342 return std::nullopt;
7343
7344 auto &MF = *RootDef.getParent()->getParent();
7345 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7346 return std::nullopt;
7347
7348 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7349 MachineIRBuilder MIRBuilder(RootDef);
7350 Register AdrpReg = Adrp.getOperand(0).getReg();
7351 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7352 [=](MachineInstrBuilder &MIB) {
7353 MIB.addGlobalAddress(GV, Offset,
7354 OpFlags | AArch64II::MO_PAGEOFF |
7356 }}};
7357}
7358
7359/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7360/// "Size" argument is the size in bytes of the memory reference, which
7361/// determines the scale.
7363AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7364 unsigned Size) const {
7365 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7367
7368 if (!Root.isReg())
7369 return std::nullopt;
7370
7371 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7372 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7373 return {{
7374 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7375 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7376 }};
7377 }
7378
7380 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7381 if (CM == CodeModel::Small) {
7382 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7383 if (OpFns)
7384 return OpFns;
7385 }
7386
7387 if (isBaseWithConstantOffset(Root, MRI)) {
7388 MachineOperand &LHS = RootDef->getOperand(1);
7389 MachineOperand &RHS = RootDef->getOperand(2);
7390 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7391 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7392
7393 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7394 unsigned Scale = Log2_32(Size);
7395 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7396 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7397 return {{
7398 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7399 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7400 }};
7401
7402 return {{
7403 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7404 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7405 }};
7406 }
7407 }
7408
7409 // Before falling back to our general case, check if the unscaled
7410 // instructions can handle this. If so, that's preferable.
7411 if (selectAddrModeUnscaled(Root, Size))
7412 return std::nullopt;
7413
7414 return {{
7415 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7416 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7417 }};
7418}
7419
7420/// Given a shift instruction, return the correct shift type for that
7421/// instruction.
7423 switch (MI.getOpcode()) {
7424 default:
7426 case TargetOpcode::G_SHL:
7427 return AArch64_AM::LSL;
7428 case TargetOpcode::G_LSHR:
7429 return AArch64_AM::LSR;
7430 case TargetOpcode::G_ASHR:
7431 return AArch64_AM::ASR;
7432 case TargetOpcode::G_ROTR:
7433 return AArch64_AM::ROR;
7434 }
7435}
7436
7437/// Select a "shifted register" operand. If the value is not shifted, set the
7438/// shift operand to a default value of "lsl 0".
7440AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7441 bool AllowROR) const {
7442 if (!Root.isReg())
7443 return std::nullopt;
7445 Root.getParent()->getParent()->getParent()->getRegInfo();
7446
7447 // Check if the operand is defined by an instruction which corresponds to
7448 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7449 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7451 if (ShType == AArch64_AM::InvalidShiftExtend)
7452 return std::nullopt;
7453 if (ShType == AArch64_AM::ROR && !AllowROR)
7454 return std::nullopt;
7455 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7456 return std::nullopt;
7457
7458 // Need an immediate on the RHS.
7459 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7460 auto Immed = getImmedFromMO(ShiftRHS);
7461 if (!Immed)
7462 return std::nullopt;
7463
7464 // We have something that we can fold. Fold in the shift's LHS and RHS into
7465 // the instruction.
7466 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7467 Register ShiftReg = ShiftLHS.getReg();
7468
7469 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7470 unsigned Val = *Immed & (NumBits - 1);
7471 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7472
7473 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7474 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7475}
7476
7477AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7478 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7479 unsigned Opc = MI.getOpcode();
7480
7481 // Handle explicit extend instructions first.
7482 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7483 unsigned Size;
7484 if (Opc == TargetOpcode::G_SEXT)
7485 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7486 else
7487 Size = MI.getOperand(2).getImm();
7488 assert(Size != 64 && "Extend from 64 bits?");
7489 switch (Size) {
7490 case 8:
7491 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7492 case 16:
7493 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7494 case 32:
7495 return AArch64_AM::SXTW;
7496 default:
7498 }
7499 }
7500
7501 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7502 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7503 assert(Size != 64 && "Extend from 64 bits?");
7504 switch (Size) {
7505 case 8:
7506 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7507 case 16:
7508 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7509 case 32:
7510 return AArch64_AM::UXTW;
7511 default:
7513 }
7514 }
7515
7516 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7517 // on the RHS.
7518 if (Opc != TargetOpcode::G_AND)
7520
7521 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7522 if (!MaybeAndMask)
7524 uint64_t AndMask = *MaybeAndMask;
7525 switch (AndMask) {
7526 default:
7528 case 0xFF:
7529 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7530 case 0xFFFF:
7531 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7532 case 0xFFFFFFFF:
7533 return AArch64_AM::UXTW;
7534 }
7535}
7536
7537Register AArch64InstructionSelector::moveScalarRegClass(
7538 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7539 MachineRegisterInfo &MRI = *MIB.getMRI();
7540 auto Ty = MRI.getType(Reg);
7541 assert(!Ty.isVector() && "Expected scalars only!");
7542 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7543 return Reg;
7544
7545 // Create a copy and immediately select it.
7546 // FIXME: We should have an emitCopy function?
7547 auto Copy = MIB.buildCopy({&RC}, {Reg});
7548 selectCopy(*Copy, TII, MRI, TRI, RBI);
7549 return Copy.getReg(0);
7550}
7551
7552/// Select an "extended register" operand. This operand folds in an extend
7553/// followed by an optional left shift.
7555AArch64InstructionSelector::selectArithExtendedRegister(
7556 MachineOperand &Root) const {
7557 if (!Root.isReg())
7558 return std::nullopt;
7560 Root.getParent()->getParent()->getParent()->getRegInfo();
7561
7562 uint64_t ShiftVal = 0;
7563 Register ExtReg;
7565 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7566 if (!RootDef)
7567 return std::nullopt;
7568
7569 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7570 return std::nullopt;
7571
7572 // Check if we can fold a shift and an extend.
7573 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7574 // Look for a constant on the RHS of the shift.
7575 MachineOperand &RHS = RootDef->getOperand(2);
7576 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7577 if (!MaybeShiftVal)
7578 return std::nullopt;
7579 ShiftVal = *MaybeShiftVal;
7580 if (ShiftVal > 4)
7581 return std::nullopt;
7582 // Look for a valid extend instruction on the LHS of the shift.
7583 MachineOperand &LHS = RootDef->getOperand(1);
7584 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7585 if (!ExtDef)
7586 return std::nullopt;
7587 Ext = getExtendTypeForInst(*ExtDef, MRI);
7589 return std::nullopt;
7590 ExtReg = ExtDef->getOperand(1).getReg();
7591 } else {
7592 // Didn't get a shift. Try just folding an extend.
7593 Ext = getExtendTypeForInst(*RootDef, MRI);
7595 return std::nullopt;
7596 ExtReg = RootDef->getOperand(1).getReg();
7597
7598 // If we have a 32 bit instruction which zeroes out the high half of a
7599 // register, we get an implicit zero extend for free. Check if we have one.
7600 // FIXME: We actually emit the extend right now even though we don't have
7601 // to.
7602 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7603 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7604 if (isDef32(*ExtInst))
7605 return std::nullopt;
7606 }
7607 }
7608
7609 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7610 // copy.
7611 MachineIRBuilder MIB(*RootDef);
7612 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7613
7614 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7615 [=](MachineInstrBuilder &MIB) {
7616 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7617 }}};
7618}
7619
7621AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7622 if (!Root.isReg())
7623 return std::nullopt;
7625 Root.getParent()->getParent()->getParent()->getRegInfo();
7626
7627 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7628 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7629 STI.isLittleEndian())
7630 Extract =
7631 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7632 if (!Extract)
7633 return std::nullopt;
7634
7635 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7636 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7637 Register ExtReg = Extract->MI->getOperand(2).getReg();
7638 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7639 }
7640 }
7641 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7642 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7644 Extract->MI->getOperand(2).getReg(), MRI);
7645 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7646 LaneIdx->Value.getSExtValue() == 1) {
7647 Register ExtReg = Extract->MI->getOperand(1).getReg();
7648 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7649 }
7650 }
7651
7652 return std::nullopt;
7653}
7654
7655void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7656 const MachineInstr &MI,
7657 int OpIdx) const {
7658 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7659 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7660 "Expected G_CONSTANT");
7661 std::optional<int64_t> CstVal =
7662 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7663 assert(CstVal && "Expected constant value");
7664 MIB.addImm(*CstVal);
7665}
7666
7667void AArch64InstructionSelector::renderLogicalImm32(
7668 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7669 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7670 "Expected G_CONSTANT");
7671 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7673 MIB.addImm(Enc);
7674}
7675
7676void AArch64InstructionSelector::renderLogicalImm64(
7677 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7678 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7679 "Expected G_CONSTANT");
7680 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7682 MIB.addImm(Enc);
7683}
7684
7685void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7686 const MachineInstr &MI,
7687 int OpIdx) const {
7688 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7689 "Expected G_UBSANTRAP");
7690 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7691}
7692
7693void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7694 const MachineInstr &MI,
7695 int OpIdx) const {
7696 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7697 "Expected G_FCONSTANT");
7698 MIB.addImm(
7699 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7700}
7701
7702void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7703 const MachineInstr &MI,
7704 int OpIdx) const {
7705 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7706 "Expected G_FCONSTANT");
7707 MIB.addImm(
7708 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7709}
7710
7711void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7712 const MachineInstr &MI,
7713 int OpIdx) const {
7714 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7715 "Expected G_FCONSTANT");
7716 MIB.addImm(
7717 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7718}
7719
7720void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7721 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7722 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7723 "Expected G_FCONSTANT");
7725 .getFPImm()
7726 ->getValueAPF()
7727 .bitcastToAPInt()
7728 .getZExtValue()));
7729}
7730
7731bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7732 const MachineInstr &MI, unsigned NumBytes) const {
7733 if (!MI.mayLoadOrStore())
7734 return false;
7735 assert(MI.hasOneMemOperand() &&
7736 "Expected load/store to have only one mem op!");
7737 return (*MI.memoperands_begin())->getSize() == NumBytes;
7738}
7739
7740bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7741 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7742 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7743 return false;
7744
7745 // Only return true if we know the operation will zero-out the high half of
7746 // the 64-bit register. Truncates can be subregister copies, which don't
7747 // zero out the high bits. Copies and other copy-like instructions can be
7748 // fed by truncates, or could be lowered as subregister copies.
7749 switch (MI.getOpcode()) {
7750 default:
7751 return true;
7752 case TargetOpcode::COPY:
7753 case TargetOpcode::G_BITCAST:
7754 case TargetOpcode::G_TRUNC:
7755 case TargetOpcode::G_PHI:
7756 return false;
7757 }
7758}
7759
7760
7761// Perform fixups on the given PHI instruction's operands to force them all
7762// to be the same as the destination regbank.
7764 const AArch64RegisterBankInfo &RBI) {
7765 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7766 Register DstReg = MI.getOperand(0).getReg();
7767 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7768 assert(DstRB && "Expected PHI dst to have regbank assigned");
7769 MachineIRBuilder MIB(MI);
7770
7771 // Go through each operand and ensure it has the same regbank.
7772 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7773 if (!MO.isReg())
7774 continue;
7775 Register OpReg = MO.getReg();
7776 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7777 if (RB != DstRB) {
7778 // Insert a cross-bank copy.
7779 auto *OpDef = MRI.getVRegDef(OpReg);
7780 const LLT &Ty = MRI.getType(OpReg);
7781 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7782
7783 // Any instruction we insert must appear after all PHIs in the block
7784 // for the block to be valid MIR.
7785 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7786 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7787 InsertPt = OpDefBB.getFirstNonPHI();
7788 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7789 auto Copy = MIB.buildCopy(Ty, OpReg);
7790 MRI.setRegBank(Copy.getReg(0), *DstRB);
7791 MO.setReg(Copy.getReg(0));
7792 }
7793 }
7794}
7795
7796void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7797 // We're looking for PHIs, build a list so we don't invalidate iterators.
7800 for (auto &BB : MF) {
7801 for (auto &MI : BB) {
7802 if (MI.getOpcode() == TargetOpcode::G_PHI)
7803 Phis.emplace_back(&MI);
7804 }
7805 }
7806
7807 for (auto *MI : Phis) {
7808 // We need to do some work here if the operand types are < 16 bit and they
7809 // are split across fpr/gpr banks. Since all types <32b on gpr
7810 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7811 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7812 // be selecting heterogenous regbanks for operands if possible, but we
7813 // still need to be able to deal with it here.
7814 //
7815 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7816 // one other operand is on the fpr bank, then we add cross-bank copies
7817 // to homogenize the operand banks. For simplicity the bank that we choose
7818 // to settle on is whatever bank the def operand has. For example:
7819 //
7820 // %endbb:
7821 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7822 // =>
7823 // %bb2:
7824 // ...
7825 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7826 // ...
7827 // %endbb:
7828 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7829 bool HasGPROp = false, HasFPROp = false;
7830 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7831 if (!MO.isReg())
7832 continue;
7833 const LLT &Ty = MRI.getType(MO.getReg());
7834 if (!Ty.isValid() || !Ty.isScalar())
7835 break;
7836 if (Ty.getSizeInBits() >= 32)
7837 break;
7838 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7839 // If for some reason we don't have a regbank yet. Don't try anything.
7840 if (!RB)
7841 break;
7842
7843 if (RB->getID() == AArch64::GPRRegBankID)
7844 HasGPROp = true;
7845 else
7846 HasFPROp = true;
7847 }
7848 // We have heterogenous regbanks, need to fixup.
7849 if (HasGPROp && HasFPROp)
7850 fixupPHIOpBanks(*MI, MRI, RBI);
7851 }
7852}
7853
7854namespace llvm {
7857 const AArch64Subtarget &Subtarget,
7858 const AArch64RegisterBankInfo &RBI) {
7859 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7860}
7861}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:760
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:768
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:766
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:773
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:767
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
bool isIntPredicate() const
Definition: InstrTypes.h:865
bool isUnsigned() const
Definition: InstrTypes.h:1013
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3015
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:319
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:316
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:161
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:42
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1686
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1745
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
Key
PAL metadata keys.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1574
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:903
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:307
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1630
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:432
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.