LLVM 23.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
318 MachineIRBuilder &MIRBuilder) const;
319 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323 const RegisterBank &DstRB, LLT ScalarTy,
324 Register VecReg, unsigned LaneIdx,
325 MachineIRBuilder &MIRBuilder) const;
326 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
328 MachineIRBuilder &MIRBuilder) const;
329 /// Emit a CSet for a FP compare.
330 ///
331 /// \p Dst is expected to be a 32-bit scalar register.
332 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333 MachineIRBuilder &MIRBuilder) const;
334
335 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336 /// Might elide the instruction if the previous instruction already sets NZCV
337 /// correctly.
338 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340 /// Emit the overflow op for \p Opcode.
341 ///
342 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343 /// G_USUBO, etc.
344 std::pair<MachineInstr *, AArch64CC::CondCode>
345 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351 /// In some cases this is even possible with OR operations in the expression.
353 MachineIRBuilder &MIB) const;
358 MachineIRBuilder &MIB) const;
360 bool Negate, Register CCOp,
362 MachineIRBuilder &MIB) const;
363
364 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365 /// \p IsNegative is true if the test should be "not zero".
366 /// This will also optimize the test bit instruction when possible.
367 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
368 MachineBasicBlock *DstMBB,
369 MachineIRBuilder &MIB) const;
370
371 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
372 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
373 MachineBasicBlock *DestMBB,
374 MachineIRBuilder &MIB) const;
375
376 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377 // We use these manually instead of using the importer since it doesn't
378 // support SDNodeXForm.
379 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389 unsigned Size) const;
390
391 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 1);
393 }
394 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 2);
396 }
397 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 4);
399 }
400 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 8);
402 }
403 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404 return selectAddrModeUnscaled(Root, 16);
405 }
406
407 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408 /// from complex pattern matchers like selectAddrModeIndexed().
409 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410 MachineRegisterInfo &MRI) const;
411
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413 unsigned Size) const;
414 template <int Width>
415 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416 return selectAddrModeIndexed(Root, Width / 8);
417 }
418
419 std::optional<bool>
420 isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421 const MachineRegisterInfo &MRI) const;
422
423 bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424 const MachineRegisterInfo &MRI,
425 bool IsAddrOperand) const;
426 ComplexRendererFns
427 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429
430 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431 /// or not a shift + extend should be folded into an addressing mode. Returns
432 /// None when this is not profitable or possible.
433 ComplexRendererFns
434 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435 MachineOperand &Offset, unsigned SizeInBytes,
436 bool WantsExt) const;
437 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442 return selectAddrModeXRO(Root, Width / 8);
443 }
444
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446 unsigned SizeInBytes) const;
447 template <int Width>
448 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449 return selectAddrModeWRO(Root, Width / 8);
450 }
451
452 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453 bool AllowROR = false) const;
454
455 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456 return selectShiftedRegister(Root);
457 }
458
459 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460 return selectShiftedRegister(Root, true);
461 }
462
463 /// Given an extend instruction, determine the correct shift-extend type for
464 /// that instruction.
465 ///
466 /// If the instruction is going to be used in a load or store, pass
467 /// \p IsLoadStore = true.
469 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470 bool IsLoadStore = false) const;
471
472 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473 ///
474 /// \returns Either \p Reg if no change was necessary, or the new register
475 /// created by moving \p Reg.
476 ///
477 /// Note: This uses emitCopy right now.
478 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479 MachineIRBuilder &MIB) const;
480
481 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485 ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486 ComplexRendererFns
487 selectCVTFixedPointVecBase(const MachineOperand &Root) const;
488 void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx = -1) const;
490
491 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
494 int OpIdx = -1) const;
495 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
496 int OpIdx = -1) const;
497 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
498 int OpIdx) const;
499 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
500 int OpIdx = -1) const;
501 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
502 int OpIdx = -1) const;
503 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
504 int OpIdx = -1) const;
505 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
506 const MachineInstr &MI,
507 int OpIdx = -1) const;
508
509 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
510 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
511
512 // Optimization methods.
513 bool tryOptSelect(GSelect &Sel);
514 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
515 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
517 MachineIRBuilder &MIRBuilder) const;
518
519 /// Return true if \p MI is a load or store of \p NumBytes bytes.
520 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
521
522 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
523 /// register zeroed out. In other words, the result of MI has been explicitly
524 /// zero extended.
525 bool isDef32(const MachineInstr &MI) const;
526
527 const AArch64TargetMachine &TM;
528 const AArch64Subtarget &STI;
529 const AArch64InstrInfo &TII;
531 const AArch64RegisterBankInfo &RBI;
532
533 bool ProduceNonFlagSettingCondBr = false;
534
535 // Some cached values used during selection.
536 // We use LR as a live-in register, and we keep track of it here as it can be
537 // clobbered by calls.
538 Register MFReturnAddr;
539
541
542#define GET_GLOBALISEL_PREDICATES_DECL
543#include "AArch64GenGlobalISel.inc"
544#undef GET_GLOBALISEL_PREDICATES_DECL
545
546// We declare the temporaries used by selectImpl() in the class to minimize the
547// cost of constructing placeholder values.
548#define GET_GLOBALISEL_TEMPORARIES_DECL
549#include "AArch64GenGlobalISel.inc"
550#undef GET_GLOBALISEL_TEMPORARIES_DECL
551};
552
553} // end anonymous namespace
554
555#define GET_GLOBALISEL_IMPL
556#include "AArch64GenGlobalISel.inc"
557#undef GET_GLOBALISEL_IMPL
558
559AArch64InstructionSelector::AArch64InstructionSelector(
560 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
561 const AArch64RegisterBankInfo &RBI)
562 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
563 RBI(RBI),
565#include "AArch64GenGlobalISel.inc"
568#include "AArch64GenGlobalISel.inc"
570{
571}
572
573// FIXME: This should be target-independent, inferred from the types declared
574// for each class in the bank.
575//
576/// Given a register bank, and a type, return the smallest register class that
577/// can represent that combination.
578static const TargetRegisterClass *
579getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
580 bool GetAllRegSet = false) {
581 if (RB.getID() == AArch64::GPRRegBankID) {
582 if (Ty.getSizeInBits() <= 32)
583 return GetAllRegSet ? &AArch64::GPR32allRegClass
584 : &AArch64::GPR32RegClass;
585 if (Ty.getSizeInBits() == 64)
586 return GetAllRegSet ? &AArch64::GPR64allRegClass
587 : &AArch64::GPR64RegClass;
588 if (Ty.getSizeInBits() == 128)
589 return &AArch64::XSeqPairsClassRegClass;
590 return nullptr;
591 }
592
593 if (RB.getID() == AArch64::FPRRegBankID) {
594 switch (Ty.getSizeInBits()) {
595 case 8:
596 return &AArch64::FPR8RegClass;
597 case 16:
598 return &AArch64::FPR16RegClass;
599 case 32:
600 return &AArch64::FPR32RegClass;
601 case 64:
602 return &AArch64::FPR64RegClass;
603 case 128:
604 return &AArch64::FPR128RegClass;
605 }
606 return nullptr;
607 }
608
609 return nullptr;
610}
611
612/// Given a register bank, and size in bits, return the smallest register class
613/// that can represent that combination.
614static const TargetRegisterClass *
616 bool GetAllRegSet = false) {
617 if (SizeInBits.isScalable()) {
618 assert(RB.getID() == AArch64::FPRRegBankID &&
619 "Expected FPR regbank for scalable type size");
620 return &AArch64::ZPRRegClass;
621 }
622
623 unsigned RegBankID = RB.getID();
624
625 if (RegBankID == AArch64::GPRRegBankID) {
626 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
627 if (SizeInBits <= 32)
628 return GetAllRegSet ? &AArch64::GPR32allRegClass
629 : &AArch64::GPR32RegClass;
630 if (SizeInBits == 64)
631 return GetAllRegSet ? &AArch64::GPR64allRegClass
632 : &AArch64::GPR64RegClass;
633 if (SizeInBits == 128)
634 return &AArch64::XSeqPairsClassRegClass;
635 }
636
637 if (RegBankID == AArch64::FPRRegBankID) {
638 if (SizeInBits.isScalable()) {
639 assert(SizeInBits == TypeSize::getScalable(128) &&
640 "Unexpected scalable register size");
641 return &AArch64::ZPRRegClass;
642 }
643
644 switch (SizeInBits) {
645 default:
646 return nullptr;
647 case 8:
648 return &AArch64::FPR8RegClass;
649 case 16:
650 return &AArch64::FPR16RegClass;
651 case 32:
652 return &AArch64::FPR32RegClass;
653 case 64:
654 return &AArch64::FPR64RegClass;
655 case 128:
656 return &AArch64::FPR128RegClass;
657 }
658 }
659
660 return nullptr;
661}
662
663/// Returns the correct subregister to use for a given register class.
665 const TargetRegisterInfo &TRI, unsigned &SubReg) {
666 switch (TRI.getRegSizeInBits(*RC)) {
667 case 8:
668 SubReg = AArch64::bsub;
669 break;
670 case 16:
671 SubReg = AArch64::hsub;
672 break;
673 case 32:
674 if (RC != &AArch64::FPR32RegClass)
675 SubReg = AArch64::sub_32;
676 else
677 SubReg = AArch64::ssub;
678 break;
679 case 64:
680 SubReg = AArch64::dsub;
681 break;
682 default:
684 dbgs() << "Couldn't find appropriate subregister for register class.");
685 return false;
686 }
687
688 return true;
689}
690
691/// Returns the minimum size the given register bank can hold.
692static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
693 switch (RB.getID()) {
694 case AArch64::GPRRegBankID:
695 return 32;
696 case AArch64::FPRRegBankID:
697 return 8;
698 default:
699 llvm_unreachable("Tried to get minimum size for unknown register bank.");
700 }
701}
702
703/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
704/// Helper function for functions like createDTuple and createQTuple.
705///
706/// \p RegClassIDs - The list of register class IDs available for some tuple of
707/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
708/// expected to contain between 2 and 4 tuple classes.
709///
710/// \p SubRegs - The list of subregister classes associated with each register
711/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
712/// subregister class. The index of each subregister class is expected to
713/// correspond with the index of each register class.
714///
715/// \returns Either the destination register of REG_SEQUENCE instruction that
716/// was created, or the 0th element of \p Regs if \p Regs contains a single
717/// element.
719 const unsigned RegClassIDs[],
720 const unsigned SubRegs[], MachineIRBuilder &MIB) {
721 unsigned NumRegs = Regs.size();
722 if (NumRegs == 1)
723 return Regs[0];
724 assert(NumRegs >= 2 && NumRegs <= 4 &&
725 "Only support between two and 4 registers in a tuple!");
727 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
728 auto RegSequence =
729 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
730 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
731 RegSequence.addUse(Regs[I]);
732 RegSequence.addImm(SubRegs[I]);
733 }
734 return RegSequence.getReg(0);
735}
736
737/// Create a tuple of D-registers using the registers in \p Regs.
739 static const unsigned RegClassIDs[] = {
740 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
741 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
742 AArch64::dsub2, AArch64::dsub3};
743 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
744}
745
746/// Create a tuple of Q-registers using the registers in \p Regs.
748 static const unsigned RegClassIDs[] = {
749 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
750 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
751 AArch64::qsub2, AArch64::qsub3};
752 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
753}
754
755static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
756 auto &MI = *Root.getParent();
757 auto &MBB = *MI.getParent();
758 auto &MF = *MBB.getParent();
759 auto &MRI = MF.getRegInfo();
760 uint64_t Immed;
761 if (Root.isImm())
762 Immed = Root.getImm();
763 else if (Root.isCImm())
764 Immed = Root.getCImm()->getZExtValue();
765 else if (Root.isReg()) {
766 auto ValAndVReg =
768 if (!ValAndVReg)
769 return std::nullopt;
770 Immed = ValAndVReg->Value.getSExtValue();
771 } else
772 return std::nullopt;
773 return Immed;
774}
775
776/// Check whether \p I is a currently unsupported binary operation:
777/// - it has an unsized type
778/// - an operand is not a vreg
779/// - all operands are not in the same bank
780/// These are checks that should someday live in the verifier, but right now,
781/// these are mostly limitations of the aarch64 selector.
782static bool unsupportedBinOp(const MachineInstr &I,
783 const AArch64RegisterBankInfo &RBI,
784 const MachineRegisterInfo &MRI,
785 const AArch64RegisterInfo &TRI) {
786 LLT Ty = MRI.getType(I.getOperand(0).getReg());
787 if (!Ty.isValid()) {
788 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
789 return true;
790 }
791
792 const RegisterBank *PrevOpBank = nullptr;
793 for (auto &MO : I.operands()) {
794 // FIXME: Support non-register operands.
795 if (!MO.isReg()) {
796 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
797 return true;
798 }
799
800 // FIXME: Can generic operations have physical registers operands? If
801 // so, this will need to be taught about that, and we'll need to get the
802 // bank out of the minimal class for the register.
803 // Either way, this needs to be documented (and possibly verified).
804 if (!MO.getReg().isVirtual()) {
805 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
806 return true;
807 }
808
809 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
810 if (!OpBank) {
811 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
812 return true;
813 }
814
815 if (PrevOpBank && OpBank != PrevOpBank) {
816 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
817 return true;
818 }
819 PrevOpBank = OpBank;
820 }
821 return false;
822}
823
824/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
825/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
826/// and of size \p OpSize.
827/// \returns \p GenericOpc if the combination is unsupported.
828static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
829 unsigned OpSize) {
830 switch (RegBankID) {
831 case AArch64::GPRRegBankID:
832 if (OpSize == 32) {
833 switch (GenericOpc) {
834 case TargetOpcode::G_SHL:
835 return AArch64::LSLVWr;
836 case TargetOpcode::G_LSHR:
837 return AArch64::LSRVWr;
838 case TargetOpcode::G_ASHR:
839 return AArch64::ASRVWr;
840 default:
841 return GenericOpc;
842 }
843 } else if (OpSize == 64) {
844 switch (GenericOpc) {
845 case TargetOpcode::G_PTR_ADD:
846 return AArch64::ADDXrr;
847 case TargetOpcode::G_SHL:
848 return AArch64::LSLVXr;
849 case TargetOpcode::G_LSHR:
850 return AArch64::LSRVXr;
851 case TargetOpcode::G_ASHR:
852 return AArch64::ASRVXr;
853 default:
854 return GenericOpc;
855 }
856 }
857 break;
858 case AArch64::FPRRegBankID:
859 switch (OpSize) {
860 case 32:
861 switch (GenericOpc) {
862 case TargetOpcode::G_FADD:
863 return AArch64::FADDSrr;
864 case TargetOpcode::G_FSUB:
865 return AArch64::FSUBSrr;
866 case TargetOpcode::G_FMUL:
867 return AArch64::FMULSrr;
868 case TargetOpcode::G_FDIV:
869 return AArch64::FDIVSrr;
870 default:
871 return GenericOpc;
872 }
873 case 64:
874 switch (GenericOpc) {
875 case TargetOpcode::G_FADD:
876 return AArch64::FADDDrr;
877 case TargetOpcode::G_FSUB:
878 return AArch64::FSUBDrr;
879 case TargetOpcode::G_FMUL:
880 return AArch64::FMULDrr;
881 case TargetOpcode::G_FDIV:
882 return AArch64::FDIVDrr;
883 case TargetOpcode::G_OR:
884 return AArch64::ORRv8i8;
885 default:
886 return GenericOpc;
887 }
888 }
889 break;
890 }
891 return GenericOpc;
892}
893
894/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
895/// appropriate for the (value) register bank \p RegBankID and of memory access
896/// size \p OpSize. This returns the variant with the base+unsigned-immediate
897/// addressing mode (e.g., LDRXui).
898/// \returns \p GenericOpc if the combination is unsupported.
899static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
900 unsigned OpSize) {
901 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
902 switch (RegBankID) {
903 case AArch64::GPRRegBankID:
904 switch (OpSize) {
905 case 8:
906 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
907 case 16:
908 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
909 case 32:
910 return isStore ? AArch64::STRWui : AArch64::LDRWui;
911 case 64:
912 return isStore ? AArch64::STRXui : AArch64::LDRXui;
913 }
914 break;
915 case AArch64::FPRRegBankID:
916 switch (OpSize) {
917 case 8:
918 return isStore ? AArch64::STRBui : AArch64::LDRBui;
919 case 16:
920 return isStore ? AArch64::STRHui : AArch64::LDRHui;
921 case 32:
922 return isStore ? AArch64::STRSui : AArch64::LDRSui;
923 case 64:
924 return isStore ? AArch64::STRDui : AArch64::LDRDui;
925 case 128:
926 return isStore ? AArch64::STRQui : AArch64::LDRQui;
927 }
928 break;
929 }
930 return GenericOpc;
931}
932
933/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
934/// to \p *To.
935///
936/// E.g "To = COPY SrcReg:SubReg"
938 const RegisterBankInfo &RBI, Register SrcReg,
939 const TargetRegisterClass *To, unsigned SubReg) {
940 assert(SrcReg.isValid() && "Expected a valid source register?");
941 assert(To && "Destination register class cannot be null");
942 assert(SubReg && "Expected a valid subregister");
943
944 MachineIRBuilder MIB(I);
945 auto SubRegCopy =
946 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, {}, SubReg);
947 MachineOperand &RegOp = I.getOperand(1);
948 RegOp.setReg(SubRegCopy.getReg(0));
949
950 // It's possible that the destination register won't be constrained. Make
951 // sure that happens.
952 if (!I.getOperand(0).getReg().isPhysical())
953 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
954
955 return true;
956}
957
958/// Helper function to get the source and destination register classes for a
959/// copy. Returns a std::pair containing the source register class for the
960/// copy, and the destination register class for the copy. If a register class
961/// cannot be determined, then it will be nullptr.
962static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
965 const RegisterBankInfo &RBI) {
966 Register DstReg = I.getOperand(0).getReg();
967 Register SrcReg = I.getOperand(1).getReg();
968 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
969 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
970
971 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
972 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
973
974 // Special casing for cross-bank copies of s1s. We can technically represent
975 // a 1-bit value with any size of register. The minimum size for a GPR is 32
976 // bits. So, we need to put the FPR on 32 bits as well.
977 //
978 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
979 // then we can pull it into the helpers that get the appropriate class for a
980 // register bank. Or make a new helper that carries along some constraint
981 // information.
982 if (SrcRegBank != DstRegBank &&
983 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
984 SrcSize = DstSize = TypeSize::getFixed(32);
985
986 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
987 getMinClassForRegBank(DstRegBank, DstSize, true)};
988}
989
990// FIXME: We need some sort of API in RBI/TRI to allow generic code to
991// constrain operands of simple instructions given a TargetRegisterClass
992// and LLT
994 const RegisterBankInfo &RBI) {
995 for (MachineOperand &MO : I.operands()) {
996 if (!MO.isReg())
997 continue;
998 Register Reg = MO.getReg();
999 if (!Reg)
1000 continue;
1001 if (Reg.isPhysical())
1002 continue;
1003 LLT Ty = MRI.getType(Reg);
1004 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
1005 const TargetRegisterClass *RC =
1007 if (!RC) {
1008 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1009 RC = getRegClassForTypeOnBank(Ty, RB);
1010 if (!RC) {
1011 LLVM_DEBUG(
1012 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1013 break;
1014 }
1015 }
1016 RBI.constrainGenericRegister(Reg, *RC, MRI);
1017 }
1018
1019 return true;
1020}
1021
1024 const RegisterBankInfo &RBI) {
1025 Register DstReg = I.getOperand(0).getReg();
1026 Register SrcReg = I.getOperand(1).getReg();
1027 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1028 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1029
1030 // Find the correct register classes for the source and destination registers.
1031 const TargetRegisterClass *SrcRC;
1032 const TargetRegisterClass *DstRC;
1033 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1034
1035 if (!DstRC) {
1036 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1037 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1038 return false;
1039 }
1040
1041 // Is this a copy? If so, then we may need to insert a subregister copy.
1042 if (I.isCopy()) {
1043 // Yes. Check if there's anything to fix up.
1044 if (!SrcRC) {
1045 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1046 return false;
1047 }
1048
1049 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1050 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1051 unsigned SubReg;
1052
1053 // If the source bank doesn't support a subregister copy small enough,
1054 // then we first need to copy to the destination bank.
1055 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1056 const TargetRegisterClass *DstTempRC =
1057 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1058 getSubRegForClass(DstRC, TRI, SubReg);
1059
1060 MachineIRBuilder MIB(I);
1061 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1062 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1063 } else if (SrcSize > DstSize) {
1064 // If the source register is bigger than the destination we need to
1065 // perform a subregister copy.
1066 const TargetRegisterClass *SubRegRC =
1067 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1068 getSubRegForClass(SubRegRC, TRI, SubReg);
1069 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1070 } else if (DstSize > SrcSize) {
1071 // If the destination register is bigger than the source we need to do
1072 // a promotion using SUBREG_TO_REG.
1073 const TargetRegisterClass *PromotionRC =
1074 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1075 getSubRegForClass(SrcRC, TRI, SubReg);
1076
1077 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1078 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1079 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1080 .addUse(SrcReg)
1081 .addImm(SubReg);
1082 MachineOperand &RegOp = I.getOperand(1);
1083 RegOp.setReg(PromoteReg);
1084 }
1085
1086 // If the destination is a physical register, then there's nothing to
1087 // change, so we're done.
1088 if (DstReg.isPhysical())
1089 return true;
1090 }
1091
1092 // No need to constrain SrcReg. It will get constrained when we hit another
1093 // of its use or its defs. Copies do not have constraints.
1094 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1095 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1096 << " operand\n");
1097 return false;
1098 }
1099
1100 // If this a GPR ZEXT that we want to just reduce down into a copy.
1101 // The sizes will be mismatched with the source < 32b but that's ok.
1102 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1103 I.setDesc(TII.get(AArch64::COPY));
1104 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1105 return selectCopy(I, TII, MRI, TRI, RBI);
1106 }
1107
1108 I.setDesc(TII.get(AArch64::COPY));
1109 return true;
1110}
1111
1113AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1114 Register False, AArch64CC::CondCode CC,
1115 MachineIRBuilder &MIB) const {
1116 MachineRegisterInfo &MRI = *MIB.getMRI();
1117 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1118 RBI.getRegBank(True, MRI, TRI)->getID() &&
1119 "Expected both select operands to have the same regbank?");
1120 LLT Ty = MRI.getType(True);
1121 if (Ty.isVector())
1122 return nullptr;
1123 const unsigned Size = Ty.getSizeInBits();
1124 assert((Size == 32 || Size == 64) &&
1125 "Expected 32 bit or 64 bit select only?");
1126 const bool Is32Bit = Size == 32;
1127 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1128 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1129 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1131 return &*FCSel;
1132 }
1133
1134 // By default, we'll try and emit a CSEL.
1135 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1136 bool Optimized = false;
1137 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1138 &Optimized](Register &Reg, Register &OtherReg,
1139 bool Invert) {
1140 if (Optimized)
1141 return false;
1142
1143 // Attempt to fold:
1144 //
1145 // %sub = G_SUB 0, %x
1146 // %select = G_SELECT cc, %reg, %sub
1147 //
1148 // Into:
1149 // %select = CSNEG %reg, %x, cc
1150 Register MatchReg;
1151 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1152 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1153 Reg = MatchReg;
1154 if (Invert) {
1156 std::swap(Reg, OtherReg);
1157 }
1158 return true;
1159 }
1160
1161 // Attempt to fold:
1162 //
1163 // %xor = G_XOR %x, -1
1164 // %select = G_SELECT cc, %reg, %xor
1165 //
1166 // Into:
1167 // %select = CSINV %reg, %x, cc
1168 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1169 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1170 Reg = MatchReg;
1171 if (Invert) {
1173 std::swap(Reg, OtherReg);
1174 }
1175 return true;
1176 }
1177
1178 // Attempt to fold:
1179 //
1180 // %add = G_ADD %x, 1
1181 // %select = G_SELECT cc, %reg, %add
1182 //
1183 // Into:
1184 // %select = CSINC %reg, %x, cc
1185 if (mi_match(Reg, MRI,
1186 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1187 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1188 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1189 Reg = MatchReg;
1190 if (Invert) {
1192 std::swap(Reg, OtherReg);
1193 }
1194 return true;
1195 }
1196
1197 return false;
1198 };
1199
1200 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1201 // true/false values are constants.
1202 // FIXME: All of these patterns already exist in tablegen. We should be
1203 // able to import these.
1204 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1205 &Optimized]() {
1206 if (Optimized)
1207 return false;
1208 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1209 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1210 if (!TrueCst && !FalseCst)
1211 return false;
1212
1213 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1214 if (TrueCst && FalseCst) {
1215 int64_t T = TrueCst->Value.getSExtValue();
1216 int64_t F = FalseCst->Value.getSExtValue();
1217
1218 if (T == 0 && F == 1) {
1219 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1220 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1221 True = ZReg;
1222 False = ZReg;
1223 return true;
1224 }
1225
1226 if (T == 0 && F == -1) {
1227 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1228 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1229 True = ZReg;
1230 False = ZReg;
1231 return true;
1232 }
1233 }
1234
1235 if (TrueCst) {
1236 int64_t T = TrueCst->Value.getSExtValue();
1237 if (T == 1) {
1238 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1239 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1240 True = False;
1241 False = ZReg;
1243 return true;
1244 }
1245
1246 if (T == -1) {
1247 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1248 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1249 True = False;
1250 False = ZReg;
1252 return true;
1253 }
1254 }
1255
1256 if (FalseCst) {
1257 int64_t F = FalseCst->Value.getSExtValue();
1258 if (F == 1) {
1259 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1260 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1261 False = ZReg;
1262 return true;
1263 }
1264
1265 if (F == -1) {
1266 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1267 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1268 False = ZReg;
1269 return true;
1270 }
1271 }
1272 return false;
1273 };
1274
1275 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1276 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1277 Optimized |= TryOptSelectCst();
1278 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1279 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1280 return &*SelectInst;
1281}
1282
1285 MachineRegisterInfo *MRI = nullptr) {
1286 switch (P) {
1287 default:
1288 llvm_unreachable("Unknown condition code!");
1289 case CmpInst::ICMP_NE:
1290 return AArch64CC::NE;
1291 case CmpInst::ICMP_EQ:
1292 return AArch64CC::EQ;
1293 case CmpInst::ICMP_SGT:
1294 return AArch64CC::GT;
1295 case CmpInst::ICMP_SGE:
1296 if (RHS && MRI) {
1297 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1298 if (ValAndVReg && ValAndVReg->Value == 0)
1299 return AArch64CC::PL;
1300 }
1301 return AArch64CC::GE;
1302 case CmpInst::ICMP_SLT:
1303 if (RHS && MRI) {
1304 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1305 if (ValAndVReg && ValAndVReg->Value == 0)
1306 return AArch64CC::MI;
1307 }
1308 return AArch64CC::LT;
1309 case CmpInst::ICMP_SLE:
1310 return AArch64CC::LE;
1311 case CmpInst::ICMP_UGT:
1312 return AArch64CC::HI;
1313 case CmpInst::ICMP_UGE:
1314 return AArch64CC::HS;
1315 case CmpInst::ICMP_ULT:
1316 return AArch64CC::LO;
1317 case CmpInst::ICMP_ULE:
1318 return AArch64CC::LS;
1319 }
1320}
1321
1322/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1324 AArch64CC::CondCode &CondCode,
1325 AArch64CC::CondCode &CondCode2) {
1326 CondCode2 = AArch64CC::AL;
1327 switch (CC) {
1328 default:
1329 llvm_unreachable("Unknown FP condition!");
1330 case CmpInst::FCMP_OEQ:
1331 CondCode = AArch64CC::EQ;
1332 break;
1333 case CmpInst::FCMP_OGT:
1334 CondCode = AArch64CC::GT;
1335 break;
1336 case CmpInst::FCMP_OGE:
1337 CondCode = AArch64CC::GE;
1338 break;
1339 case CmpInst::FCMP_OLT:
1340 CondCode = AArch64CC::MI;
1341 break;
1342 case CmpInst::FCMP_OLE:
1343 CondCode = AArch64CC::LS;
1344 break;
1345 case CmpInst::FCMP_ONE:
1346 CondCode = AArch64CC::MI;
1347 CondCode2 = AArch64CC::GT;
1348 break;
1349 case CmpInst::FCMP_ORD:
1350 CondCode = AArch64CC::VC;
1351 break;
1352 case CmpInst::FCMP_UNO:
1353 CondCode = AArch64CC::VS;
1354 break;
1355 case CmpInst::FCMP_UEQ:
1356 CondCode = AArch64CC::EQ;
1357 CondCode2 = AArch64CC::VS;
1358 break;
1359 case CmpInst::FCMP_UGT:
1360 CondCode = AArch64CC::HI;
1361 break;
1362 case CmpInst::FCMP_UGE:
1363 CondCode = AArch64CC::PL;
1364 break;
1365 case CmpInst::FCMP_ULT:
1366 CondCode = AArch64CC::LT;
1367 break;
1368 case CmpInst::FCMP_ULE:
1369 CondCode = AArch64CC::LE;
1370 break;
1371 case CmpInst::FCMP_UNE:
1372 CondCode = AArch64CC::NE;
1373 break;
1374 }
1375}
1376
1377/// Convert an IR fp condition code to an AArch64 CC.
1378/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1379/// should be AND'ed instead of OR'ed.
1381 AArch64CC::CondCode &CondCode,
1382 AArch64CC::CondCode &CondCode2) {
1383 CondCode2 = AArch64CC::AL;
1384 switch (CC) {
1385 default:
1386 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1387 assert(CondCode2 == AArch64CC::AL);
1388 break;
1389 case CmpInst::FCMP_ONE:
1390 // (a one b)
1391 // == ((a olt b) || (a ogt b))
1392 // == ((a ord b) && (a une b))
1393 CondCode = AArch64CC::VC;
1394 CondCode2 = AArch64CC::NE;
1395 break;
1396 case CmpInst::FCMP_UEQ:
1397 // (a ueq b)
1398 // == ((a uno b) || (a oeq b))
1399 // == ((a ule b) && (a uge b))
1400 CondCode = AArch64CC::PL;
1401 CondCode2 = AArch64CC::LE;
1402 break;
1403 }
1404}
1405
1406/// Return a register which can be used as a bit to test in a TB(N)Z.
1407static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1408 MachineRegisterInfo &MRI) {
1409 assert(Reg.isValid() && "Expected valid register!");
1410 bool HasZext = false;
1411 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1412 unsigned Opc = MI->getOpcode();
1413
1414 if (!MI->getOperand(0).isReg() ||
1415 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1416 break;
1417
1418 // (tbz (any_ext x), b) -> (tbz x, b) and
1419 // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
1420 //
1421 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1422 // on the truncated x is the same as the bit number on x.
1423 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1424 Opc == TargetOpcode::G_TRUNC) {
1425 if (Opc == TargetOpcode::G_ZEXT)
1426 HasZext = true;
1427
1428 Register NextReg = MI->getOperand(1).getReg();
1429 // Did we find something worth folding?
1430 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1431 break;
1432 TypeSize InSize = MRI.getType(NextReg).getSizeInBits();
1433 if (Bit >= InSize)
1434 break;
1435
1436 // NextReg is worth folding. Keep looking.
1437 Reg = NextReg;
1438 continue;
1439 }
1440
1441 // Attempt to find a suitable operation with a constant on one side.
1442 std::optional<uint64_t> C;
1443 Register TestReg;
1444 switch (Opc) {
1445 default:
1446 break;
1447 case TargetOpcode::G_AND:
1448 case TargetOpcode::G_XOR: {
1449 TestReg = MI->getOperand(1).getReg();
1450 Register ConstantReg = MI->getOperand(2).getReg();
1451 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1452 if (!VRegAndVal) {
1453 // AND commutes, check the other side for a constant.
1454 // FIXME: Can we canonicalize the constant so that it's always on the
1455 // same side at some point earlier?
1456 std::swap(ConstantReg, TestReg);
1457 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1458 }
1459 if (VRegAndVal) {
1460 if (HasZext)
1461 C = VRegAndVal->Value.getZExtValue();
1462 else
1463 C = VRegAndVal->Value.getSExtValue();
1464 }
1465 break;
1466 }
1467 case TargetOpcode::G_ASHR:
1468 case TargetOpcode::G_LSHR:
1469 case TargetOpcode::G_SHL: {
1470 TestReg = MI->getOperand(1).getReg();
1471 auto VRegAndVal =
1472 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1473 if (VRegAndVal)
1474 C = VRegAndVal->Value.getSExtValue();
1475 break;
1476 }
1477 }
1478
1479 // Didn't find a constant or viable register. Bail out of the loop.
1480 if (!C || !TestReg.isValid())
1481 break;
1482
1483 // We found a suitable instruction with a constant. Check to see if we can
1484 // walk through the instruction.
1485 Register NextReg;
1486 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1487 switch (Opc) {
1488 default:
1489 break;
1490 case TargetOpcode::G_AND:
1491 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1492 if ((*C >> Bit) & 1)
1493 NextReg = TestReg;
1494 break;
1495 case TargetOpcode::G_SHL:
1496 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1497 // the type of the register.
1498 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1499 NextReg = TestReg;
1500 Bit = Bit - *C;
1501 }
1502 break;
1503 case TargetOpcode::G_ASHR:
1504 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1505 // in x
1506 NextReg = TestReg;
1507 Bit = Bit + *C;
1508 if (Bit >= TestRegSize)
1509 Bit = TestRegSize - 1;
1510 break;
1511 case TargetOpcode::G_LSHR:
1512 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1513 if ((Bit + *C) < TestRegSize) {
1514 NextReg = TestReg;
1515 Bit = Bit + *C;
1516 }
1517 break;
1518 case TargetOpcode::G_XOR:
1519 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1520 // appropriate.
1521 //
1522 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1523 //
1524 // tbz x', b -> tbnz x, b
1525 //
1526 // Because x' only has the b-th bit set if x does not.
1527 if ((*C >> Bit) & 1)
1528 Invert = !Invert;
1529 NextReg = TestReg;
1530 break;
1531 }
1532
1533 // Check if we found anything worth folding.
1534 if (!NextReg.isValid())
1535 return Reg;
1536 Reg = NextReg;
1537 }
1538
1539 return Reg;
1540}
1541
1542MachineInstr *AArch64InstructionSelector::emitTestBit(
1543 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1544 MachineIRBuilder &MIB) const {
1545 assert(TestReg.isValid());
1546 assert(ProduceNonFlagSettingCondBr &&
1547 "Cannot emit TB(N)Z with speculation tracking!");
1548 MachineRegisterInfo &MRI = *MIB.getMRI();
1549
1550 // Attempt to optimize the test bit by walking over instructions.
1551 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1552 LLT Ty = MRI.getType(TestReg);
1553 unsigned Size = Ty.getSizeInBits();
1554 assert(!Ty.isVector() && "Expected a scalar!");
1555 assert(Bit < 64 && "Bit is too large!");
1556
1557 // When the test register is a 64-bit register, we have to narrow to make
1558 // TBNZW work.
1559 bool UseWReg = Bit < 32;
1560 unsigned NecessarySize = UseWReg ? 32 : 64;
1561 if (Size != NecessarySize)
1562 TestReg = moveScalarRegClass(
1563 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1564 MIB);
1565
1566 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1567 {AArch64::TBZW, AArch64::TBNZW}};
1568 unsigned Opc = OpcTable[UseWReg][IsNegative];
1569 auto TestBitMI =
1570 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1571 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1572 return &*TestBitMI;
1573}
1574
1575bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1576 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1577 MachineIRBuilder &MIB) const {
1578 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1579 // Given something like this:
1580 //
1581 // %x = ...Something...
1582 // %one = G_CONSTANT i64 1
1583 // %zero = G_CONSTANT i64 0
1584 // %and = G_AND %x, %one
1585 // %cmp = G_ICMP intpred(ne), %and, %zero
1586 // %cmp_trunc = G_TRUNC %cmp
1587 // G_BRCOND %cmp_trunc, %bb.3
1588 //
1589 // We want to try and fold the AND into the G_BRCOND and produce either a
1590 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1591 //
1592 // In this case, we'd get
1593 //
1594 // TBNZ %x %bb.3
1595 //
1596
1597 // Check if the AND has a constant on its RHS which we can use as a mask.
1598 // If it's a power of 2, then it's the same as checking a specific bit.
1599 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1600 auto MaybeBit = getIConstantVRegValWithLookThrough(
1601 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1602 if (!MaybeBit)
1603 return false;
1604
1605 int32_t Bit = MaybeBit->Value.exactLogBase2();
1606 if (Bit < 0)
1607 return false;
1608
1609 Register TestReg = AndInst.getOperand(1).getReg();
1610
1611 // Emit a TB(N)Z.
1612 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1613 return true;
1614}
1615
1616MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1617 bool IsNegative,
1618 MachineBasicBlock *DestMBB,
1619 MachineIRBuilder &MIB) const {
1620 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1621 MachineRegisterInfo &MRI = *MIB.getMRI();
1622 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1623 AArch64::GPRRegBankID &&
1624 "Expected GPRs only?");
1625 auto Ty = MRI.getType(CompareReg);
1626 unsigned Width = Ty.getSizeInBits();
1627 assert(!Ty.isVector() && "Expected scalar only?");
1628 assert(Width <= 64 && "Expected width to be at most 64?");
1629 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1630 {AArch64::CBNZW, AArch64::CBNZX}};
1631 unsigned Opc = OpcTable[IsNegative][Width == 64];
1632 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1633 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1634 return &*BranchMI;
1635}
1636
1637bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1638 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1639 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1641 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1642 // totally clean. Some of them require two branches to implement.
1643 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1644 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1645 Pred);
1646 AArch64CC::CondCode CC1, CC2;
1647 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1648 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1649 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1650 if (CC2 != AArch64CC::AL)
1651 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1652 I.eraseFromParent();
1653 return true;
1654}
1655
1656bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1657 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1658 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1659 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1660 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1661 //
1662 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1663 // instructions will not be produced, as they are conditional branch
1664 // instructions that do not set flags.
1665 if (!ProduceNonFlagSettingCondBr)
1666 return false;
1667
1668 MachineRegisterInfo &MRI = *MIB.getMRI();
1669 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1670 auto Pred =
1671 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1672 Register LHS = ICmp.getOperand(2).getReg();
1673 Register RHS = ICmp.getOperand(3).getReg();
1674
1675 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1676 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1677 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1678
1679 // When we can emit a TB(N)Z, prefer that.
1680 //
1681 // Handle non-commutative condition codes first.
1682 // Note that we don't want to do this when we have a G_AND because it can
1683 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1684 if (VRegAndVal && !AndInst) {
1685 int64_t C = VRegAndVal->Value.getSExtValue();
1686
1687 // When we have a greater-than comparison, we can just test if the msb is
1688 // zero.
1689 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1690 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1691 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1692 I.eraseFromParent();
1693 return true;
1694 }
1695
1696 // When we have a less than comparison, we can just test if the msb is not
1697 // zero.
1698 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1699 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1700 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1701 I.eraseFromParent();
1702 return true;
1703 }
1704
1705 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1706 // we can test if the msb is zero.
1707 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1708 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1709 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1710 I.eraseFromParent();
1711 return true;
1712 }
1713 }
1714
1715 // Attempt to handle commutative condition codes. Right now, that's only
1716 // eq/ne.
1717 if (ICmpInst::isEquality(Pred)) {
1718 if (!VRegAndVal) {
1719 std::swap(RHS, LHS);
1720 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1721 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1722 }
1723
1724 if (VRegAndVal && VRegAndVal->Value == 0) {
1725 // If there's a G_AND feeding into this branch, try to fold it away by
1726 // emitting a TB(N)Z instead.
1727 //
1728 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1729 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1730 // would be redundant.
1731 if (AndInst &&
1732 tryOptAndIntoCompareBranch(
1733 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1734 I.eraseFromParent();
1735 return true;
1736 }
1737
1738 // Otherwise, try to emit a CB(N)Z instead.
1739 auto LHSTy = MRI.getType(LHS);
1740 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1741 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1742 I.eraseFromParent();
1743 return true;
1744 }
1745 }
1746 }
1747
1748 return false;
1749}
1750
1751bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1752 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1753 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1754 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1755 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1756 return true;
1757
1758 // Couldn't optimize. Emit a compare + a Bcc.
1759 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1760 auto &PredOp = ICmp.getOperand(1);
1761 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1763 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1764 ICmp.getOperand(3).getReg(), MIB.getMRI());
1765 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1766 I.eraseFromParent();
1767 return true;
1768}
1769
1770bool AArch64InstructionSelector::selectCompareBranch(
1771 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1772 Register CondReg = I.getOperand(0).getReg();
1773 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1774 // Try to select the G_BRCOND using whatever is feeding the condition if
1775 // possible.
1776 unsigned CCMIOpc = CCMI->getOpcode();
1777 if (CCMIOpc == TargetOpcode::G_FCMP)
1778 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1779 if (CCMIOpc == TargetOpcode::G_ICMP)
1780 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1781
1782 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1783 // instructions will not be produced, as they are conditional branch
1784 // instructions that do not set flags.
1785 if (ProduceNonFlagSettingCondBr) {
1786 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1787 I.getOperand(1).getMBB(), MIB);
1788 I.eraseFromParent();
1789 return true;
1790 }
1791
1792 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1793 auto TstMI =
1794 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1796 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1798 .addMBB(I.getOperand(1).getMBB());
1799 I.eraseFromParent();
1801 return true;
1802}
1803
1804/// Returns the element immediate value of a vector shift operand if found.
1805/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1806static std::optional<int64_t> getVectorShiftImm(Register Reg,
1807 MachineRegisterInfo &MRI) {
1808 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1809 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1810 return getAArch64VectorSplatScalar(*OpMI, MRI);
1811}
1812
1813/// Matches and returns the shift immediate value for a SHL instruction given
1814/// a shift operand.
1815static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1816 MachineRegisterInfo &MRI) {
1817 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1818 if (!ShiftImm)
1819 return std::nullopt;
1820 // Check the immediate is in range for a SHL.
1821 int64_t Imm = *ShiftImm;
1822 if (Imm < 0)
1823 return std::nullopt;
1824 switch (SrcTy.getElementType().getSizeInBits()) {
1825 default:
1826 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1827 return std::nullopt;
1828 case 8:
1829 if (Imm > 7)
1830 return std::nullopt;
1831 break;
1832 case 16:
1833 if (Imm > 15)
1834 return std::nullopt;
1835 break;
1836 case 32:
1837 if (Imm > 31)
1838 return std::nullopt;
1839 break;
1840 case 64:
1841 if (Imm > 63)
1842 return std::nullopt;
1843 break;
1844 }
1845 return Imm;
1846}
1847
1848bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1849 MachineRegisterInfo &MRI) {
1850 assert(I.getOpcode() == TargetOpcode::G_SHL);
1851 Register DstReg = I.getOperand(0).getReg();
1852 const LLT Ty = MRI.getType(DstReg);
1853 Register Src1Reg = I.getOperand(1).getReg();
1854 Register Src2Reg = I.getOperand(2).getReg();
1855
1856 if (!Ty.isVector())
1857 return false;
1858
1859 // Check if we have a vector of constants on RHS that we can select as the
1860 // immediate form.
1861 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1862
1863 unsigned Opc = 0;
1864 if (Ty == LLT::fixed_vector(2, 64)) {
1865 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1866 } else if (Ty == LLT::fixed_vector(4, 32)) {
1867 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1868 } else if (Ty == LLT::fixed_vector(2, 32)) {
1869 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1870 } else if (Ty == LLT::fixed_vector(4, 16)) {
1871 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1872 } else if (Ty == LLT::fixed_vector(8, 16)) {
1873 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1874 } else if (Ty == LLT::fixed_vector(16, 8)) {
1875 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1876 } else if (Ty == LLT::fixed_vector(8, 8)) {
1877 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1878 } else {
1879 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1880 return false;
1881 }
1882
1883 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1884 if (ImmVal)
1885 Shl.addImm(*ImmVal);
1886 else
1887 Shl.addUse(Src2Reg);
1889 I.eraseFromParent();
1890 return true;
1891}
1892
1893bool AArch64InstructionSelector::selectVectorAshrLshr(
1894 MachineInstr &I, MachineRegisterInfo &MRI) {
1895 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1896 I.getOpcode() == TargetOpcode::G_LSHR);
1897 Register DstReg = I.getOperand(0).getReg();
1898 const LLT Ty = MRI.getType(DstReg);
1899 Register Src1Reg = I.getOperand(1).getReg();
1900 Register Src2Reg = I.getOperand(2).getReg();
1901
1902 if (!Ty.isVector())
1903 return false;
1904
1905 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1906
1907 // We expect the immediate case to be lowered in the PostLegalCombiner to
1908 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1909
1910 // There is not a shift right register instruction, but the shift left
1911 // register instruction takes a signed value, where negative numbers specify a
1912 // right shift.
1913
1914 unsigned Opc = 0;
1915 unsigned NegOpc = 0;
1916 const TargetRegisterClass *RC =
1917 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1918 if (Ty == LLT::fixed_vector(2, 64)) {
1919 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1920 NegOpc = AArch64::NEGv2i64;
1921 } else if (Ty == LLT::fixed_vector(4, 32)) {
1922 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1923 NegOpc = AArch64::NEGv4i32;
1924 } else if (Ty == LLT::fixed_vector(2, 32)) {
1925 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1926 NegOpc = AArch64::NEGv2i32;
1927 } else if (Ty == LLT::fixed_vector(4, 16)) {
1928 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1929 NegOpc = AArch64::NEGv4i16;
1930 } else if (Ty == LLT::fixed_vector(8, 16)) {
1931 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1932 NegOpc = AArch64::NEGv8i16;
1933 } else if (Ty == LLT::fixed_vector(16, 8)) {
1934 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1935 NegOpc = AArch64::NEGv16i8;
1936 } else if (Ty == LLT::fixed_vector(8, 8)) {
1937 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1938 NegOpc = AArch64::NEGv8i8;
1939 } else {
1940 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1941 return false;
1942 }
1943
1944 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1946 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1948 I.eraseFromParent();
1949 return true;
1950}
1951
1952bool AArch64InstructionSelector::selectVaStartAAPCS(
1953 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1954
1956 MF.getFunction().isVarArg()))
1957 return false;
1958
1959 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1960 // Standard, section 10.1.5.
1961
1962 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1963 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1964 const auto *PtrRegClass =
1965 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1966
1967 const MCInstrDesc &MCIDAddAddr =
1968 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1969 const MCInstrDesc &MCIDStoreAddr =
1970 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1971
1972 /*
1973 * typedef struct va_list {
1974 * void * stack; // next stack param
1975 * void * gr_top; // end of GP arg reg save area
1976 * void * vr_top; // end of FP/SIMD arg reg save area
1977 * int gr_offs; // offset from gr_top to next GP register arg
1978 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1979 * } va_list;
1980 */
1981 const auto VAList = I.getOperand(0).getReg();
1982
1983 // Our current offset in bytes from the va_list struct (VAList).
1984 unsigned OffsetBytes = 0;
1985
1986 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1987 // and increment OffsetBytes by PtrSize.
1988 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1989 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1990 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
1991 .addDef(Top)
1992 .addFrameIndex(FrameIndex)
1993 .addImm(Imm)
1994 .addImm(0);
1996
1997 const auto *MMO = *I.memoperands_begin();
1998 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
1999 .addUse(Top)
2000 .addUse(VAList)
2001 .addImm(OffsetBytes / PtrSize)
2003 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2004 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2006
2007 OffsetBytes += PtrSize;
2008 };
2009
2010 // void* stack at offset 0
2011 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2012
2013 // void* gr_top at offset 8 (4 on ILP32)
2014 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2015 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2016
2017 // void* vr_top at offset 16 (8 on ILP32)
2018 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2019 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2020
2021 // Helper function to store a 4-byte integer constant to VAList at offset
2022 // OffsetBytes, and increment OffsetBytes by 4.
2023 const auto PushIntConstant = [&](const int32_t Value) {
2024 constexpr int IntSize = 4;
2025 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2026 auto MIB =
2027 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2028 .addDef(Temp)
2029 .addImm(Value);
2031
2032 const auto *MMO = *I.memoperands_begin();
2033 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2034 .addUse(Temp)
2035 .addUse(VAList)
2036 .addImm(OffsetBytes / IntSize)
2038 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2039 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2041 OffsetBytes += IntSize;
2042 };
2043
2044 // int gr_offs at offset 24 (12 on ILP32)
2045 PushIntConstant(-static_cast<int32_t>(GPRSize));
2046
2047 // int vr_offs at offset 28 (16 on ILP32)
2048 PushIntConstant(-static_cast<int32_t>(FPRSize));
2049
2050 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2051
2052 I.eraseFromParent();
2053 return true;
2054}
2055
2056bool AArch64InstructionSelector::selectVaStartDarwin(
2057 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2058 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2059 Register ListReg = I.getOperand(0).getReg();
2060
2061 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2062
2063 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2064 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2066 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2067 ? FuncInfo->getVarArgsGPRIndex()
2068 : FuncInfo->getVarArgsStackIndex();
2069 }
2070
2071 auto MIB =
2072 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2073 .addDef(ArgsAddrReg)
2074 .addFrameIndex(FrameIdx)
2075 .addImm(0)
2076 .addImm(0);
2077
2079
2080 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2081 .addUse(ArgsAddrReg)
2082 .addUse(ListReg)
2083 .addImm(0)
2084 .addMemOperand(*I.memoperands_begin());
2085
2087 I.eraseFromParent();
2088 return true;
2089}
2090
2091void AArch64InstructionSelector::materializeLargeCMVal(
2092 MachineInstr &I, const Value *V, unsigned OpFlags) {
2093 MachineBasicBlock &MBB = *I.getParent();
2094 MachineFunction &MF = *MBB.getParent();
2095 MachineRegisterInfo &MRI = MF.getRegInfo();
2096
2097 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2098 MovZ->addOperand(MF, I.getOperand(1));
2099 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2101 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2103
2104 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2105 Register ForceDstReg) {
2106 Register DstReg = ForceDstReg
2107 ? ForceDstReg
2108 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2109 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2110 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2111 MovI->addOperand(MF, MachineOperand::CreateGA(
2112 GV, MovZ->getOperand(1).getOffset(), Flags));
2113 } else {
2114 MovI->addOperand(
2116 MovZ->getOperand(1).getOffset(), Flags));
2117 }
2120 return DstReg;
2121 };
2122 Register DstReg = BuildMovK(MovZ.getReg(0),
2124 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2125 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2126}
2127
2128bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2129 MachineBasicBlock &MBB = *I.getParent();
2130 MachineFunction &MF = *MBB.getParent();
2131 MachineRegisterInfo &MRI = MF.getRegInfo();
2132
2133 switch (I.getOpcode()) {
2134 case TargetOpcode::G_CONSTANT: {
2135 Register DefReg = I.getOperand(0).getReg();
2136 const LLT DefTy = MRI.getType(DefReg);
2137 if (!DefTy.isPointer())
2138 return false;
2139 const unsigned PtrSize = DefTy.getSizeInBits();
2140 if (PtrSize != 32 && PtrSize != 64)
2141 return false;
2142 // Convert pointer typed constants to integers so TableGen can select.
2143 MRI.setType(DefReg, LLT::integer(PtrSize));
2144 return true;
2145 }
2146 case TargetOpcode::G_STORE: {
2147 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2148 MachineOperand &SrcOp = I.getOperand(0);
2149 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2150 // Allow matching with imported patterns for stores of pointers. Unlike
2151 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2152 // and constrain.
2153 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2154 Register NewSrc = Copy.getReg(0);
2155 SrcOp.setReg(NewSrc);
2156 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2157 Changed = true;
2158 }
2159 return Changed;
2160 }
2161 case TargetOpcode::G_PTR_ADD: {
2162 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2163 // arithmetic semantics instead of falling back to regular arithmetic.
2164 const auto &TL = STI.getTargetLowering();
2165 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2166 return false;
2167 return convertPtrAddToAdd(I, MRI);
2168 }
2169 case TargetOpcode::G_LOAD: {
2170 // For scalar loads of pointers, we try to convert the dest type from p0
2171 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2172 // conversion, this should be ok because all users should have been
2173 // selected already, so the type doesn't matter for them.
2174 Register DstReg = I.getOperand(0).getReg();
2175 const LLT DstTy = MRI.getType(DstReg);
2176 if (!DstTy.isPointer())
2177 return false;
2178 MRI.setType(DstReg, LLT::scalar(64));
2179 return true;
2180 }
2181 case AArch64::G_DUP: {
2182 // Convert the type from p0 to s64 to help selection.
2183 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2184 if (!DstTy.isPointerVector())
2185 return false;
2186 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2187 MRI.setType(I.getOperand(0).getReg(),
2188 DstTy.changeElementType(LLT::scalar(64)));
2189 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2190 I.getOperand(1).setReg(NewSrc.getReg(0));
2191 return true;
2192 }
2193 case AArch64::G_INSERT_VECTOR_ELT: {
2194 // Convert the type from p0 to s64 to help selection.
2195 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2196 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2197 if (!SrcVecTy.isPointerVector())
2198 return false;
2199 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2200 MRI.setType(I.getOperand(1).getReg(),
2201 DstTy.changeElementType(LLT::scalar(64)));
2202 MRI.setType(I.getOperand(0).getReg(),
2203 DstTy.changeElementType(LLT::scalar(64)));
2204 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2205 I.getOperand(2).setReg(NewSrc.getReg(0));
2206 return true;
2207 }
2208 case TargetOpcode::G_UITOFP:
2209 case TargetOpcode::G_SITOFP: {
2210 // If both source and destination regbanks are FPR, then convert the opcode
2211 // to G_SITOF so that the importer can select it to an fpr variant.
2212 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2213 // copy.
2214 Register SrcReg = I.getOperand(1).getReg();
2215 LLT SrcTy = MRI.getType(SrcReg);
2216 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2217 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2218 return false;
2219
2220 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2221 // Need to add a copy to change the type so that the existing patterns can
2222 // match when there is an integer on an FPR bank.
2223 if (SrcTy.getScalarType().isInteger()) {
2224 auto Copy = MIB.buildCopy(DstTy, SrcReg);
2225 I.getOperand(1).setReg(Copy.getReg(0));
2226 MRI.setRegClass(Copy.getReg(0),
2227 getRegClassForTypeOnBank(
2228 SrcTy, RBI.getRegBank(AArch64::FPRRegBankID)));
2229 }
2230 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2231 I.setDesc(TII.get(AArch64::G_SITOF));
2232 else
2233 I.setDesc(TII.get(AArch64::G_UITOF));
2234 return true;
2235 }
2236 return false;
2237 }
2238 default:
2239 return false;
2240 }
2241}
2242
2243/// This lowering tries to look for G_PTR_ADD instructions and then converts
2244/// them to a standard G_ADD with a COPY on the source.
2245///
2246/// The motivation behind this is to expose the add semantics to the imported
2247/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2248/// because the selector works bottom up, uses before defs. By the time we
2249/// end up trying to select a G_PTR_ADD, we should have already attempted to
2250/// fold this into addressing modes and were therefore unsuccessful.
2251bool AArch64InstructionSelector::convertPtrAddToAdd(
2252 MachineInstr &I, MachineRegisterInfo &MRI) {
2253 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2254 Register DstReg = I.getOperand(0).getReg();
2255 Register AddOp1Reg = I.getOperand(1).getReg();
2256 const LLT PtrTy = MRI.getType(DstReg);
2257 if (PtrTy.getAddressSpace() != 0)
2258 return false;
2259
2260 const LLT CastPtrTy = PtrTy.isVector()
2262 : LLT::integer(64);
2263 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2264 // Set regbanks on the registers.
2265 if (PtrTy.isVector())
2266 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2267 else
2268 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2269
2270 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2271 // %dst(intty) = G_ADD %intbase, off
2272 I.setDesc(TII.get(TargetOpcode::G_ADD));
2273 MRI.setType(DstReg, CastPtrTy);
2274 I.getOperand(1).setReg(PtrToInt.getReg(0));
2275 if (!select(*PtrToInt)) {
2276 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2277 return false;
2278 }
2279
2280 // Also take the opportunity here to try to do some optimization.
2281 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2282 Register NegatedReg;
2283 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2284 return true;
2285 I.getOperand(2).setReg(NegatedReg);
2286 I.setDesc(TII.get(TargetOpcode::G_SUB));
2287 return true;
2288}
2289
2290bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2291 MachineRegisterInfo &MRI) {
2292 // We try to match the immediate variant of LSL, which is actually an alias
2293 // for a special case of UBFM. Otherwise, we fall back to the imported
2294 // selector which will match the register variant.
2295 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2296 const auto &MO = I.getOperand(2);
2297 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2298 if (!VRegAndVal)
2299 return false;
2300
2301 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2302 if (DstTy.isVector())
2303 return false;
2304 bool Is64Bit = DstTy.getSizeInBits() == 64;
2305 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2306 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2307
2308 if (!Imm1Fn || !Imm2Fn)
2309 return false;
2310
2311 auto NewI =
2312 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2313 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2314
2315 for (auto &RenderFn : *Imm1Fn)
2316 RenderFn(NewI);
2317 for (auto &RenderFn : *Imm2Fn)
2318 RenderFn(NewI);
2319
2320 I.eraseFromParent();
2322 return true;
2323}
2324
2325bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2326 MachineInstr &I, MachineRegisterInfo &MRI) {
2327 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2328 // If we're storing a scalar, it doesn't matter what register bank that
2329 // scalar is on. All that matters is the size.
2330 //
2331 // So, if we see something like this (with a 32-bit scalar as an example):
2332 //
2333 // %x:gpr(s32) = ... something ...
2334 // %y:fpr(s32) = COPY %x:gpr(s32)
2335 // G_STORE %y:fpr(s32)
2336 //
2337 // We can fix this up into something like this:
2338 //
2339 // G_STORE %x:gpr(s32)
2340 //
2341 // And then continue the selection process normally.
2342 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2343 if (!DefDstReg.isValid())
2344 return false;
2345 LLT DefDstTy = MRI.getType(DefDstReg);
2346 Register StoreSrcReg = I.getOperand(0).getReg();
2347 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2348
2349 // If we get something strange like a physical register, then we shouldn't
2350 // go any further.
2351 if (!DefDstTy.isValid())
2352 return false;
2353
2354 // Are the source and dst types the same size?
2355 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2356 return false;
2357
2358 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2359 RBI.getRegBank(DefDstReg, MRI, TRI))
2360 return false;
2361
2362 // We have a cross-bank copy, which is entering a store. Let's fold it.
2363 I.getOperand(0).setReg(DefDstReg);
2364 return true;
2365}
2366
2367bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2368 assert(I.getParent() && "Instruction should be in a basic block!");
2369 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2370
2371 MachineBasicBlock &MBB = *I.getParent();
2372 MachineFunction &MF = *MBB.getParent();
2373 MachineRegisterInfo &MRI = MF.getRegInfo();
2374
2375 switch (I.getOpcode()) {
2376 case AArch64::G_DUP: {
2377 // Before selecting a DUP instruction, check if it is better selected as a
2378 // MOV or load from a constant pool.
2379 Register Src = I.getOperand(1).getReg();
2380 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
2381 Src, MRI, /*LookThroughInstrs=*/true, /*LookThroughAnyExt=*/true);
2382 if (!ValAndVReg)
2383 return false;
2384 LLVMContext &Ctx = MF.getFunction().getContext();
2385 Register Dst = I.getOperand(0).getReg();
2387 MRI.getType(Dst).getNumElements(),
2388 ConstantInt::get(
2389 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2390 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2391 if (!emitConstantVector(Dst, CV, MIB, MRI))
2392 return false;
2393 I.eraseFromParent();
2394 return true;
2395 }
2396 case TargetOpcode::G_SEXT:
2397 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2398 // over a normal extend.
2399 if (selectUSMovFromExtend(I, MRI))
2400 return true;
2401 return false;
2402 case TargetOpcode::G_BR:
2403 return false;
2404 case TargetOpcode::G_SHL:
2405 return earlySelectSHL(I, MRI);
2406 case TargetOpcode::G_CONSTANT: {
2407 bool IsZero = false;
2408 if (I.getOperand(1).isCImm())
2409 IsZero = I.getOperand(1).getCImm()->isZero();
2410 else if (I.getOperand(1).isImm())
2411 IsZero = I.getOperand(1).getImm() == 0;
2412
2413 if (!IsZero)
2414 return false;
2415
2416 Register DefReg = I.getOperand(0).getReg();
2417 LLT Ty = MRI.getType(DefReg);
2418 if (Ty.getSizeInBits() == 64) {
2419 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2420 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2421 } else if (Ty.getSizeInBits() <= 32) {
2422 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2423 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2424 } else
2425 return false;
2426
2427 I.setDesc(TII.get(TargetOpcode::COPY));
2428 return true;
2429 }
2430
2431 case TargetOpcode::G_ADD: {
2432 // Check if this is being fed by a G_ICMP on either side.
2433 //
2434 // (cmp pred, x, y) + z
2435 //
2436 // In the above case, when the cmp is true, we increment z by 1. So, we can
2437 // fold the add into the cset for the cmp by using cinc.
2438 //
2439 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2440 Register AddDst = I.getOperand(0).getReg();
2441 Register AddLHS = I.getOperand(1).getReg();
2442 Register AddRHS = I.getOperand(2).getReg();
2443 // Only handle scalars.
2444 LLT Ty = MRI.getType(AddLHS);
2445 if (Ty.isVector())
2446 return false;
2447 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2448 // bits.
2449 unsigned Size = Ty.getSizeInBits();
2450 if (Size != 32 && Size != 64)
2451 return false;
2452 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2453 if (!MRI.hasOneNonDBGUse(Reg))
2454 return nullptr;
2455 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2456 // compare.
2457 if (Size == 32)
2458 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2459 // We model scalar compares using 32-bit destinations right now.
2460 // If it's a 64-bit compare, it'll have 64-bit sources.
2461 Register ZExt;
2462 if (!mi_match(Reg, MRI,
2464 return nullptr;
2465 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2466 if (!Cmp ||
2467 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2468 return nullptr;
2469 return Cmp;
2470 };
2471 // Try to match
2472 // z + (cmp pred, x, y)
2473 MachineInstr *Cmp = MatchCmp(AddRHS);
2474 if (!Cmp) {
2475 // (cmp pred, x, y) + z
2476 std::swap(AddLHS, AddRHS);
2477 Cmp = MatchCmp(AddRHS);
2478 if (!Cmp)
2479 return false;
2480 }
2481 auto &PredOp = Cmp->getOperand(1);
2483 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2484 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2485 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2487 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2488 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2489 I.eraseFromParent();
2490 return true;
2491 }
2492 case TargetOpcode::G_OR: {
2493 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2494 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2495 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2496 Register Dst = I.getOperand(0).getReg();
2497 LLT Ty = MRI.getType(Dst);
2498
2499 if (!Ty.isScalar())
2500 return false;
2501
2502 unsigned Size = Ty.getSizeInBits();
2503 if (Size != 32 && Size != 64)
2504 return false;
2505
2506 Register ShiftSrc;
2507 int64_t ShiftImm;
2508 Register MaskSrc;
2509 int64_t MaskImm;
2510 if (!mi_match(
2511 Dst, MRI,
2512 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2513 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2514 return false;
2515
2516 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2517 return false;
2518
2519 int64_t Immr = Size - ShiftImm;
2520 int64_t Imms = Size - ShiftImm - 1;
2521 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2522 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2523 I.eraseFromParent();
2524 return true;
2525 }
2526 case TargetOpcode::G_FENCE: {
2527 if (I.getOperand(1).getImm() == 0)
2528 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2529 else
2530 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2531 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2532 I.eraseFromParent();
2533 return true;
2534 }
2535 default:
2536 return false;
2537 }
2538}
2539
2540bool AArch64InstructionSelector::select(MachineInstr &I) {
2541 assert(I.getParent() && "Instruction should be in a basic block!");
2542 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2543
2544 MachineBasicBlock &MBB = *I.getParent();
2545 MachineFunction &MF = *MBB.getParent();
2546 MachineRegisterInfo &MRI = MF.getRegInfo();
2547
2548 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2549 if (Subtarget->requiresStrictAlign()) {
2550 // We don't support this feature yet.
2551 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2552 return false;
2553 }
2554
2556
2557 unsigned Opcode = I.getOpcode();
2558 // G_PHI requires same handling as PHI
2559 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2560 // Certain non-generic instructions also need some special handling.
2561
2562 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) {
2564 return true;
2565 }
2566
2567 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2568 const Register DefReg = I.getOperand(0).getReg();
2569 const LLT DefTy = MRI.getType(DefReg);
2570
2571 const RegClassOrRegBank &RegClassOrBank =
2572 MRI.getRegClassOrRegBank(DefReg);
2573
2574 const TargetRegisterClass *DefRC =
2576 if (!DefRC) {
2577 if (!DefTy.isValid()) {
2578 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2579 return false;
2580 }
2581 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2582 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2583 if (!DefRC) {
2584 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2585 return false;
2586 }
2587 }
2588
2589 I.setDesc(TII.get(TargetOpcode::PHI));
2590
2591 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2592 }
2593
2594 if (I.isCopy())
2595 return selectCopy(I, TII, MRI, TRI, RBI);
2596
2597 if (I.isDebugInstr())
2598 return selectDebugInstr(I, MRI, RBI);
2599
2600 return true;
2601 }
2602
2603
2604 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2605 LLVM_DEBUG(
2606 dbgs() << "Generic instruction has unexpected implicit operands\n");
2607 return false;
2608 }
2609
2610 // Try to do some lowering before we start instruction selecting. These
2611 // lowerings are purely transformations on the input G_MIR and so selection
2612 // must continue after any modification of the instruction.
2613 if (preISelLower(I)) {
2614 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2615 }
2616
2617 // There may be patterns where the importer can't deal with them optimally,
2618 // but does select it to a suboptimal sequence so our custom C++ selection
2619 // code later never has a chance to work on it. Therefore, we have an early
2620 // selection attempt here to give priority to certain selection routines
2621 // over the imported ones.
2622 if (earlySelect(I))
2623 return true;
2624
2625 if (selectImpl(I, *CoverageInfo))
2626 return true;
2627
2628 LLT Ty =
2629 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2630
2631 switch (Opcode) {
2632 case TargetOpcode::G_SBFX:
2633 case TargetOpcode::G_UBFX: {
2634 static const unsigned OpcTable[2][2] = {
2635 {AArch64::UBFMWri, AArch64::UBFMXri},
2636 {AArch64::SBFMWri, AArch64::SBFMXri}};
2637 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2638 unsigned Size = Ty.getSizeInBits();
2639 unsigned Opc = OpcTable[IsSigned][Size == 64];
2640 auto Cst1 =
2641 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2642 assert(Cst1 && "Should have gotten a constant for src 1?");
2643 auto Cst2 =
2644 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2645 assert(Cst2 && "Should have gotten a constant for src 2?");
2646 auto LSB = Cst1->Value.getZExtValue();
2647 auto Width = Cst2->Value.getZExtValue();
2648 auto BitfieldInst =
2649 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2650 .addImm(LSB)
2651 .addImm(LSB + Width - 1);
2652 I.eraseFromParent();
2653 constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2654 return true;
2655 }
2656 case TargetOpcode::G_BRCOND:
2657 return selectCompareBranch(I, MF, MRI);
2658
2659 case TargetOpcode::G_BRINDIRECT: {
2660 const Function &Fn = MF.getFunction();
2661 if (std::optional<uint16_t> BADisc =
2663 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2664 MI.addImm(AArch64PACKey::IA);
2665 MI.addImm(*BADisc);
2666 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2667 I.eraseFromParent();
2669 return true;
2670 }
2671 I.setDesc(TII.get(AArch64::BR));
2673 return true;
2674 }
2675
2676 case TargetOpcode::G_BRJT:
2677 return selectBrJT(I, MRI);
2678
2679 case AArch64::G_ADD_LOW: {
2680 // This op may have been separated from it's ADRP companion by the localizer
2681 // or some other code motion pass. Given that many CPUs will try to
2682 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2683 // which will later be expanded into an ADRP+ADD pair after scheduling.
2684 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2685 if (BaseMI->getOpcode() != AArch64::ADRP) {
2686 I.setDesc(TII.get(AArch64::ADDXri));
2687 I.addOperand(MachineOperand::CreateImm(0));
2689 return true;
2690 }
2692 "Expected small code model");
2693 auto Op1 = BaseMI->getOperand(1);
2694 auto Op2 = I.getOperand(2);
2695 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2696 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2697 Op1.getTargetFlags())
2698 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2699 Op2.getTargetFlags());
2700 I.eraseFromParent();
2701 constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2702 return true;
2703 }
2704
2705 case TargetOpcode::G_FCONSTANT: {
2706 const Register DefReg = I.getOperand(0).getReg();
2707 const LLT DefTy = MRI.getType(DefReg);
2708 const unsigned DefSize = DefTy.getSizeInBits();
2709 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2710
2711 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2712 // For 16, 64, and 128b values, emit a constant pool load.
2713 switch (DefSize) {
2714 default:
2715 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2716 case 32:
2717 case 64: {
2718 bool OptForSize = shouldOptForSize(&MF);
2719 const auto &TLI = MF.getSubtarget().getTargetLowering();
2720 // If TLI says that this fpimm is illegal, then we'll expand to a
2721 // constant pool load.
2722 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2723 EVT::getFloatingPointVT(DefSize), OptForSize))
2724 break;
2725 [[fallthrough]];
2726 }
2727 case 16:
2728 case 128: {
2729 auto *FPImm = I.getOperand(1).getFPImm();
2730 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2731 if (!LoadMI) {
2732 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2733 return false;
2734 }
2735 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2736 I.eraseFromParent();
2737 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2738 }
2739 }
2740
2741 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2742 // Either emit a FMOV, or emit a copy to emit a normal mov.
2743 const Register DefGPRReg = MRI.createVirtualRegister(
2744 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2745 MachineOperand &RegOp = I.getOperand(0);
2746 RegOp.setReg(DefGPRReg);
2747 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2748 MIB.buildCopy({DefReg}, {DefGPRReg});
2749
2750 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2751 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2752 return false;
2753 }
2754
2755 MachineOperand &ImmOp = I.getOperand(1);
2756 ImmOp.ChangeToImmediate(
2758
2759 const unsigned MovOpc =
2760 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2761 I.setDesc(TII.get(MovOpc));
2763 return true;
2764 }
2765 case TargetOpcode::G_EXTRACT: {
2766 Register DstReg = I.getOperand(0).getReg();
2767 Register SrcReg = I.getOperand(1).getReg();
2768 LLT SrcTy = MRI.getType(SrcReg);
2769 LLT DstTy = MRI.getType(DstReg);
2770 (void)DstTy;
2771 unsigned SrcSize = SrcTy.getSizeInBits();
2772
2773 if (SrcTy.getSizeInBits() > 64) {
2774 // This should be an extract of an s128, which is like a vector extract.
2775 if (SrcTy.getSizeInBits() != 128)
2776 return false;
2777 // Only support extracting 64 bits from an s128 at the moment.
2778 if (DstTy.getSizeInBits() != 64)
2779 return false;
2780
2781 unsigned Offset = I.getOperand(2).getImm();
2782 if (Offset % 64 != 0)
2783 return false;
2784
2785 // Check we have the right regbank always.
2786 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2787 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2788 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2789
2790 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2791 auto NewI =
2792 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2793 .addUse(SrcReg, {},
2794 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2795 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2796 AArch64::GPR64RegClass, NewI->getOperand(0));
2797 I.eraseFromParent();
2798 return true;
2799 }
2800
2801 // Emit the same code as a vector extract.
2802 // Offset must be a multiple of 64.
2803 unsigned LaneIdx = Offset / 64;
2804 MachineInstr *Extract = emitExtractVectorElt(
2805 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2806 if (!Extract)
2807 return false;
2808 I.eraseFromParent();
2809 return true;
2810 }
2811
2812 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2813 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2814 Ty.getSizeInBits() - 1);
2815
2816 if (SrcSize < 64) {
2817 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2818 "unexpected G_EXTRACT types");
2820 return true;
2821 }
2822
2823 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2824 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2825 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2826 .addReg(DstReg, {}, AArch64::sub_32);
2827 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2828 AArch64::GPR32RegClass, MRI);
2829 I.getOperand(0).setReg(DstReg);
2830
2832 return true;
2833 }
2834
2835 case TargetOpcode::G_INSERT: {
2836 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2837 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2838 unsigned DstSize = DstTy.getSizeInBits();
2839 // Larger inserts are vectors, same-size ones should be something else by
2840 // now (split up or turned into COPYs).
2841 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2842 return false;
2843
2844 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2845 unsigned LSB = I.getOperand(3).getImm();
2846 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2847 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2848 MachineInstrBuilder(MF, I).addImm(Width - 1);
2849
2850 if (DstSize < 64) {
2851 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2852 "unexpected G_INSERT types");
2854 return true;
2855 }
2856
2858 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2859 TII.get(AArch64::SUBREG_TO_REG))
2860 .addDef(SrcReg)
2861 .addUse(I.getOperand(2).getReg())
2862 .addImm(AArch64::sub_32);
2863 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2864 AArch64::GPR32RegClass, MRI);
2865 I.getOperand(2).setReg(SrcReg);
2866
2868 return true;
2869 }
2870 case TargetOpcode::G_FRAME_INDEX: {
2871 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2872 if (Ty != LLT::pointer(0, 64)) {
2873 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2874 << ", expected: " << LLT::pointer(0, 64) << '\n');
2875 return false;
2876 }
2877 I.setDesc(TII.get(AArch64::ADDXri));
2878
2879 // MOs for a #0 shifted immediate.
2880 I.addOperand(MachineOperand::CreateImm(0));
2881 I.addOperand(MachineOperand::CreateImm(0));
2882
2884 return true;
2885 }
2886
2887 case TargetOpcode::G_GLOBAL_VALUE: {
2888 const GlobalValue *GV = nullptr;
2889 unsigned OpFlags;
2890 if (I.getOperand(1).isSymbol()) {
2891 OpFlags = I.getOperand(1).getTargetFlags();
2892 // Currently only used by "RtLibUseGOT".
2893 assert(OpFlags == AArch64II::MO_GOT);
2894 } else {
2895 GV = I.getOperand(1).getGlobal();
2896 if (GV->isThreadLocal()) {
2897 // We don't support instructions with emulated TLS variables yet
2898 if (TM.useEmulatedTLS())
2899 return false;
2900 return selectTLSGlobalValue(I, MRI);
2901 }
2902 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2903 }
2904
2905 if (OpFlags & AArch64II::MO_GOT) {
2906 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2907 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2908 I.getOperand(1).setTargetFlags(OpFlags);
2909 I.addImplicitDefUseOperands(MF);
2910 } else if (TM.getCodeModel() == CodeModel::Large &&
2911 !TM.isPositionIndependent()) {
2912 // Materialize the global using movz/movk instructions.
2913 materializeLargeCMVal(I, GV, OpFlags);
2914 I.eraseFromParent();
2915 return true;
2916 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2917 I.setDesc(TII.get(AArch64::ADR));
2918 I.getOperand(1).setTargetFlags(OpFlags);
2919 } else {
2920 I.setDesc(TII.get(AArch64::MOVaddr));
2921 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2922 MachineInstrBuilder MIB(MF, I);
2923 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2925 }
2927 return true;
2928 }
2929
2930 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2931 return selectPtrAuthGlobalValue(I, MRI);
2932
2933 case TargetOpcode::G_ZEXTLOAD:
2934 case TargetOpcode::G_LOAD:
2935 case TargetOpcode::G_STORE: {
2936 GLoadStore &LdSt = cast<GLoadStore>(I);
2937 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2938 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2939
2940 // Can only handle AddressSpace 0, 64-bit pointers.
2941 if (PtrTy != LLT::pointer(0, 64)) {
2942 return false;
2943 }
2944
2945 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2946 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2947 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2948
2949 // Need special instructions for atomics that affect ordering.
2950 if (isStrongerThanMonotonic(Order)) {
2951 assert(!isa<GZExtLoad>(LdSt));
2952 assert(MemSizeInBytes <= 8 &&
2953 "128-bit atomics should already be custom-legalized");
2954
2955 if (isa<GLoad>(LdSt)) {
2956 static constexpr unsigned LDAPROpcodes[] = {
2957 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2958 static constexpr unsigned LDAROpcodes[] = {
2959 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2960 ArrayRef<unsigned> Opcodes =
2961 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2962 ? LDAPROpcodes
2963 : LDAROpcodes;
2964 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2965 } else {
2966 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2967 AArch64::STLRW, AArch64::STLRX};
2968 Register ValReg = LdSt.getReg(0);
2969 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2970 // Emit a subreg copy of 32 bits.
2971 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2972 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2973 .addReg(I.getOperand(0).getReg(), {}, AArch64::sub_32);
2974 I.getOperand(0).setReg(NewVal);
2975 }
2976 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2977 }
2979 return true;
2980 }
2981
2982#ifndef NDEBUG
2983 const Register PtrReg = LdSt.getPointerReg();
2984 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2985 // Check that the pointer register is valid.
2986 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2987 "Load/Store pointer operand isn't a GPR");
2988 assert(MRI.getType(PtrReg).isPointer() &&
2989 "Load/Store pointer operand isn't a pointer");
2990#endif
2991
2992 const Register ValReg = LdSt.getReg(0);
2993 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2994 LLT ValTy = MRI.getType(ValReg);
2995
2996 // The code below doesn't support truncating stores, so we need to split it
2997 // again.
2998 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2999 unsigned SubReg;
3000 LLT MemTy = LdSt.getMMO().getMemoryType();
3001 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3002 if (!getSubRegForClass(RC, TRI, SubReg))
3003 return false;
3004
3005 // Generate a subreg copy.
3006 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3007 .addReg(ValReg, {}, SubReg)
3008 .getReg(0);
3009 RBI.constrainGenericRegister(Copy, *RC, MRI);
3010 LdSt.getOperand(0).setReg(Copy);
3011 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3012 // If this is an any-extending load from the FPR bank, split it into a regular
3013 // load + extend.
3014 if (RB.getID() == AArch64::FPRRegBankID) {
3015 unsigned SubReg;
3016 LLT MemTy = LdSt.getMMO().getMemoryType();
3017 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3018 if (!getSubRegForClass(RC, TRI, SubReg))
3019 return false;
3020 Register OldDst = LdSt.getReg(0);
3021 Register NewDst =
3023 LdSt.getOperand(0).setReg(NewDst);
3024 MRI.setRegBank(NewDst, RB);
3025 // Generate a SUBREG_TO_REG to extend it.
3026 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3027 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3028 .addUse(NewDst)
3029 .addImm(SubReg);
3030 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3031 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3032 MIB.setInstr(LdSt);
3033 ValTy = MemTy; // This is no longer an extending load.
3034 }
3035 }
3036
3037 // Helper lambda for partially selecting I. Either returns the original
3038 // instruction with an updated opcode, or a new instruction.
3039 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3040 bool IsStore = isa<GStore>(I);
3041 const unsigned NewOpc =
3042 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3043 if (NewOpc == I.getOpcode())
3044 return nullptr;
3045 // Check if we can fold anything into the addressing mode.
3046 auto AddrModeFns =
3047 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3048 if (!AddrModeFns) {
3049 // Can't fold anything. Use the original instruction.
3050 I.setDesc(TII.get(NewOpc));
3051 I.addOperand(MachineOperand::CreateImm(0));
3052 return &I;
3053 }
3054
3055 // Folded something. Create a new instruction and return it.
3056 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3057 Register CurValReg = I.getOperand(0).getReg();
3058 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3059 NewInst.cloneMemRefs(I);
3060 for (auto &Fn : *AddrModeFns)
3061 Fn(NewInst);
3062 I.eraseFromParent();
3063 return &*NewInst;
3064 };
3065
3066 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3067 if (!LoadStore)
3068 return false;
3069
3070 // If we're storing a 0, use WZR/XZR.
3071 if (Opcode == TargetOpcode::G_STORE) {
3073 LoadStore->getOperand(0).getReg(), MRI);
3074 if (CVal && CVal->Value == 0) {
3075 switch (LoadStore->getOpcode()) {
3076 case AArch64::STRWui:
3077 case AArch64::STRHHui:
3078 case AArch64::STRBBui:
3079 LoadStore->getOperand(0).setReg(AArch64::WZR);
3080 break;
3081 case AArch64::STRXui:
3082 LoadStore->getOperand(0).setReg(AArch64::XZR);
3083 break;
3084 }
3085 }
3086 }
3087
3088 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3089 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3090 // The any/zextload from a smaller type to i32 should be handled by the
3091 // importer.
3092 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3093 return false;
3094 // If we have an extending load then change the load's type to be a
3095 // narrower reg and zero_extend with SUBREG_TO_REG.
3096 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3097 Register DstReg = LoadStore->getOperand(0).getReg();
3098 LoadStore->getOperand(0).setReg(LdReg);
3099
3100 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3101 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3102 .addUse(LdReg)
3103 .addImm(AArch64::sub_32);
3104 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3105 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3106 MRI);
3107 }
3108 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3109 return true;
3110 }
3111
3112 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3113 case TargetOpcode::G_INDEXED_SEXTLOAD:
3114 return selectIndexedExtLoad(I, MRI);
3115 case TargetOpcode::G_INDEXED_LOAD:
3116 return selectIndexedLoad(I, MRI);
3117 case TargetOpcode::G_INDEXED_STORE:
3118 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3119
3120 case TargetOpcode::G_LSHR:
3121 case TargetOpcode::G_ASHR:
3122 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3123 return selectVectorAshrLshr(I, MRI);
3124 [[fallthrough]];
3125 case TargetOpcode::G_SHL:
3126 if (Opcode == TargetOpcode::G_SHL &&
3127 MRI.getType(I.getOperand(0).getReg()).isVector())
3128 return selectVectorSHL(I, MRI);
3129
3130 // These shifts were legalized to have 64 bit shift amounts because we
3131 // want to take advantage of the selection patterns that assume the
3132 // immediates are s64s, however, selectBinaryOp will assume both operands
3133 // will have the same bit size.
3134 {
3135 Register SrcReg = I.getOperand(1).getReg();
3136 Register ShiftReg = I.getOperand(2).getReg();
3137 const LLT ShiftTy = MRI.getType(ShiftReg);
3138 const LLT SrcTy = MRI.getType(SrcReg);
3139 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3140 ShiftTy.getSizeInBits() == 64) {
3141 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3142 // Insert a subregister copy to implement a 64->32 trunc
3143 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3144 .addReg(ShiftReg, {}, AArch64::sub_32);
3145 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3146 I.getOperand(2).setReg(Trunc.getReg(0));
3147 }
3148 }
3149 [[fallthrough]];
3150 case TargetOpcode::G_OR: {
3151 // Reject the various things we don't support yet.
3152 if (unsupportedBinOp(I, RBI, MRI, TRI))
3153 return false;
3154
3155 const unsigned OpSize = Ty.getSizeInBits();
3156
3157 const Register DefReg = I.getOperand(0).getReg();
3158 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3159
3160 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3161 if (NewOpc == I.getOpcode())
3162 return false;
3163
3164 I.setDesc(TII.get(NewOpc));
3165 // FIXME: Should the type be always reset in setDesc?
3166
3167 // Now that we selected an opcode, we need to constrain the register
3168 // operands to use appropriate classes.
3170 return true;
3171 }
3172
3173 case TargetOpcode::G_PTR_ADD: {
3174 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3175 I.eraseFromParent();
3176 return true;
3177 }
3178
3179 case TargetOpcode::G_SADDE:
3180 case TargetOpcode::G_UADDE:
3181 case TargetOpcode::G_SSUBE:
3182 case TargetOpcode::G_USUBE:
3183 case TargetOpcode::G_SADDO:
3184 case TargetOpcode::G_UADDO:
3185 case TargetOpcode::G_SSUBO:
3186 case TargetOpcode::G_USUBO:
3187 return selectOverflowOp(I, MRI);
3188
3189 case TargetOpcode::G_PTRMASK: {
3190 Register MaskReg = I.getOperand(2).getReg();
3191 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3192 // TODO: Implement arbitrary cases
3193 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3194 return false;
3195
3196 uint64_t Mask = *MaskVal;
3197 I.setDesc(TII.get(AArch64::ANDXri));
3198 I.getOperand(2).ChangeToImmediate(
3200
3202 return true;
3203 }
3204 case TargetOpcode::G_PTRTOINT:
3205 case TargetOpcode::G_TRUNC: {
3206 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3207 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3208
3209 const Register DstReg = I.getOperand(0).getReg();
3210 const Register SrcReg = I.getOperand(1).getReg();
3211
3212 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3213 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3214
3215 if (DstRB.getID() != SrcRB.getID()) {
3216 LLVM_DEBUG(
3217 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3218 return false;
3219 }
3220
3221 if (DstRB.getID() == AArch64::GPRRegBankID) {
3222 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3223 if (!DstRC)
3224 return false;
3225
3226 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3227 if (!SrcRC)
3228 return false;
3229
3230 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3231 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3232 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3233 return false;
3234 }
3235
3236 if (DstRC == SrcRC) {
3237 // Nothing to be done
3238 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3239 SrcTy == LLT::scalar(64)) {
3240 llvm_unreachable("TableGen can import this case");
3241 return false;
3242 } else if (DstRC == &AArch64::GPR32RegClass &&
3243 SrcRC == &AArch64::GPR64RegClass) {
3244 I.getOperand(1).setSubReg(AArch64::sub_32);
3245 } else {
3246 LLVM_DEBUG(
3247 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3248 return false;
3249 }
3250
3251 I.setDesc(TII.get(TargetOpcode::COPY));
3252 return true;
3253 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3254 if (DstTy == LLT::fixed_vector(4, 16) &&
3255 SrcTy == LLT::fixed_vector(4, 32)) {
3256 I.setDesc(TII.get(AArch64::XTNv4i16));
3258 return true;
3259 }
3260
3261 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3262 MachineInstr *Extract = emitExtractVectorElt(
3263 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3264 if (!Extract)
3265 return false;
3266 I.eraseFromParent();
3267 return true;
3268 }
3269
3270 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3271 if (Opcode == TargetOpcode::G_PTRTOINT) {
3272 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3273 I.setDesc(TII.get(TargetOpcode::COPY));
3274 return selectCopy(I, TII, MRI, TRI, RBI);
3275 }
3276 }
3277
3278 return false;
3279 }
3280
3281 case TargetOpcode::G_ANYEXT: {
3282 if (selectUSMovFromExtend(I, MRI))
3283 return true;
3284
3285 const Register DstReg = I.getOperand(0).getReg();
3286 const Register SrcReg = I.getOperand(1).getReg();
3287
3288 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3289 if (RBDst.getID() != AArch64::GPRRegBankID) {
3290 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3291 << ", expected: GPR\n");
3292 return false;
3293 }
3294
3295 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3296 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3297 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3298 << ", expected: GPR\n");
3299 return false;
3300 }
3301
3302 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3303
3304 if (DstSize == 0) {
3305 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3306 return false;
3307 }
3308
3309 if (DstSize != 64 && DstSize > 32) {
3310 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3311 << ", expected: 32 or 64\n");
3312 return false;
3313 }
3314 // At this point G_ANYEXT is just like a plain COPY, but we need
3315 // to explicitly form the 64-bit value if any.
3316 if (DstSize > 32) {
3317 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3318 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3319 .addDef(ExtSrc)
3320 .addUse(SrcReg)
3321 .addImm(AArch64::sub_32);
3322 I.getOperand(1).setReg(ExtSrc);
3323 }
3324 return selectCopy(I, TII, MRI, TRI, RBI);
3325 }
3326
3327 case TargetOpcode::G_ZEXT:
3328 case TargetOpcode::G_SEXT_INREG:
3329 case TargetOpcode::G_SEXT: {
3330 if (selectUSMovFromExtend(I, MRI))
3331 return true;
3332
3333 unsigned Opcode = I.getOpcode();
3334 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3335 const Register DefReg = I.getOperand(0).getReg();
3336 Register SrcReg = I.getOperand(1).getReg();
3337 const LLT DstTy = MRI.getType(DefReg);
3338 const LLT SrcTy = MRI.getType(SrcReg);
3339 unsigned DstSize = DstTy.getSizeInBits();
3340 unsigned SrcSize = SrcTy.getSizeInBits();
3341
3342 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3343 // extended is encoded in the imm.
3344 if (Opcode == TargetOpcode::G_SEXT_INREG)
3345 SrcSize = I.getOperand(2).getImm();
3346
3347 if (DstTy.isVector())
3348 return false; // Should be handled by imported patterns.
3349
3350 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3351 AArch64::GPRRegBankID &&
3352 "Unexpected ext regbank");
3353
3354 MachineInstr *ExtI;
3355
3356 // First check if we're extending the result of a load which has a dest type
3357 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3358 // GPR register on AArch64 and all loads which are smaller automatically
3359 // zero-extend the upper bits. E.g.
3360 // %v(s8) = G_LOAD %p, :: (load 1)
3361 // %v2(s32) = G_ZEXT %v(s8)
3362 if (!IsSigned) {
3363 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3364 bool IsGPR =
3365 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3366 if (LoadMI && IsGPR) {
3367 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3368 unsigned BytesLoaded = MemOp->getSize().getValue();
3369 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3370 return selectCopy(I, TII, MRI, TRI, RBI);
3371 }
3372
3373 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3374 // + SUBREG_TO_REG.
3375 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3376 Register SubregToRegSrc =
3377 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3378 const Register ZReg = AArch64::WZR;
3379 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3380 .addImm(0);
3381
3382 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3383 .addUse(SubregToRegSrc)
3384 .addImm(AArch64::sub_32);
3385
3386 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3387 MRI)) {
3388 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3389 return false;
3390 }
3391
3392 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3393 MRI)) {
3394 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3395 return false;
3396 }
3397
3398 I.eraseFromParent();
3399 return true;
3400 }
3401 }
3402
3403 if (DstSize == 64) {
3404 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3405 // FIXME: Can we avoid manually doing this?
3406 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3407 MRI)) {
3408 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3409 << " operand\n");
3410 return false;
3411 }
3412 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3413 {&AArch64::GPR64RegClass}, {})
3414 .addUse(SrcReg)
3415 .addImm(AArch64::sub_32)
3416 .getReg(0);
3417 }
3418
3419 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3420 {DefReg}, {SrcReg})
3421 .addImm(0)
3422 .addImm(SrcSize - 1);
3423 } else if (DstSize <= 32) {
3424 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3425 {DefReg}, {SrcReg})
3426 .addImm(0)
3427 .addImm(SrcSize - 1);
3428 } else {
3429 return false;
3430 }
3431
3433 I.eraseFromParent();
3434 return true;
3435 }
3436
3437 case TargetOpcode::G_FREEZE:
3438 return selectCopy(I, TII, MRI, TRI, RBI);
3439
3440 case TargetOpcode::G_INTTOPTR:
3441 // The importer is currently unable to import pointer types since they
3442 // didn't exist in SelectionDAG.
3443 return selectCopy(I, TII, MRI, TRI, RBI);
3444
3445 case TargetOpcode::G_BITCAST:
3446 // Imported SelectionDAG rules can handle every bitcast except those that
3447 // bitcast from a type to the same type. Ideally, these shouldn't occur
3448 // but we might not run an optimizer that deletes them. The other exception
3449 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3450 // of them.
3451 return selectCopy(I, TII, MRI, TRI, RBI);
3452
3453 case TargetOpcode::G_SELECT: {
3454 auto &Sel = cast<GSelect>(I);
3455 const Register CondReg = Sel.getCondReg();
3456 const Register TReg = Sel.getTrueReg();
3457 const Register FReg = Sel.getFalseReg();
3458
3459 if (tryOptSelect(Sel))
3460 return true;
3461
3462 // Make sure to use an unused vreg instead of wzr, so that the peephole
3463 // optimizations will be able to optimize these.
3464 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3465 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3466 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3468 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3469 return false;
3470 Sel.eraseFromParent();
3471 return true;
3472 }
3473 case TargetOpcode::G_ICMP: {
3474 if (Ty.isVector())
3475 return false;
3476
3477 if (Ty != LLT::scalar(32)) {
3478 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3479 << ", expected: " << LLT::scalar(32) << '\n');
3480 return false;
3481 }
3482
3483 auto &PredOp = I.getOperand(1);
3484 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3485 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3487 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3488 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3489 /*Src2=*/AArch64::WZR, InvCC, MIB);
3490 I.eraseFromParent();
3491 return true;
3492 }
3493
3494 case TargetOpcode::G_FCMP: {
3495 CmpInst::Predicate Pred =
3496 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3497 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3498 Pred) ||
3499 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3500 return false;
3501 I.eraseFromParent();
3502 return true;
3503 }
3504 case TargetOpcode::G_VASTART:
3505 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3506 : selectVaStartAAPCS(I, MF, MRI);
3507 case TargetOpcode::G_INTRINSIC:
3508 return selectIntrinsic(I, MRI);
3509 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3510 return selectIntrinsicWithSideEffects(I, MRI);
3511 case TargetOpcode::G_IMPLICIT_DEF: {
3512 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3513 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3514 const Register DstReg = I.getOperand(0).getReg();
3515 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3516 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3517 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3518 return true;
3519 }
3520 case TargetOpcode::G_BLOCK_ADDR: {
3521 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3522 if (std::optional<uint16_t> BADisc =
3524 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3525 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3526 MIB.buildInstr(AArch64::MOVaddrPAC)
3527 .addBlockAddress(I.getOperand(1).getBlockAddress())
3529 .addReg(/*AddrDisc=*/AArch64::XZR)
3530 .addImm(*BADisc)
3531 .constrainAllUses(TII, TRI, RBI);
3532 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3533 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3534 AArch64::GPR64RegClass, MRI);
3535 I.eraseFromParent();
3536 return true;
3537 }
3539 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3540 I.eraseFromParent();
3541 return true;
3542 } else {
3543 I.setDesc(TII.get(AArch64::MOVaddrBA));
3544 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3545 I.getOperand(0).getReg())
3546 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3547 /* Offset */ 0, AArch64II::MO_PAGE)
3549 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3551 I.eraseFromParent();
3553 return true;
3554 }
3555 }
3556 case AArch64::G_DUP: {
3557 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3558 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3559 // difficult because at RBS we may end up pessimizing the fpr case if we
3560 // decided to add an anyextend to fix this. Manual selection is the most
3561 // robust solution for now.
3562 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3563 AArch64::GPRRegBankID)
3564 return false; // We expect the fpr regbank case to be imported.
3565 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3566 if (VecTy == LLT::fixed_vector(8, 8))
3567 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3568 else if (VecTy == LLT::fixed_vector(16, 8))
3569 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3570 else if (VecTy == LLT::fixed_vector(4, 16))
3571 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3572 else if (VecTy == LLT::fixed_vector(8, 16))
3573 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3574 else
3575 return false;
3577 return true;
3578 }
3579 case TargetOpcode::G_BUILD_VECTOR:
3580 return selectBuildVector(I, MRI);
3581 case TargetOpcode::G_MERGE_VALUES:
3582 return selectMergeValues(I, MRI);
3583 case TargetOpcode::G_UNMERGE_VALUES:
3584 return selectUnmergeValues(I, MRI);
3585 case TargetOpcode::G_SHUFFLE_VECTOR:
3586 return selectShuffleVector(I, MRI);
3587 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3588 return selectExtractElt(I, MRI);
3589 case TargetOpcode::G_CONCAT_VECTORS:
3590 return selectConcatVectors(I, MRI);
3591 case TargetOpcode::G_JUMP_TABLE:
3592 return selectJumpTable(I, MRI);
3593 case TargetOpcode::G_MEMCPY:
3594 case TargetOpcode::G_MEMCPY_INLINE:
3595 case TargetOpcode::G_MEMMOVE:
3596 case TargetOpcode::G_MEMSET:
3597 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3598 return selectMOPS(I, MRI);
3599 }
3600
3601 return false;
3602}
3603
3604bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3605 MachineIRBuilderState OldMIBState = MIB.getState();
3606 bool Success = select(I);
3607 MIB.setState(OldMIBState);
3608 return Success;
3609}
3610
3611bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3612 MachineRegisterInfo &MRI) {
3613 unsigned Mopcode;
3614 switch (GI.getOpcode()) {
3615 case TargetOpcode::G_MEMCPY:
3616 case TargetOpcode::G_MEMCPY_INLINE:
3617 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3618 break;
3619 case TargetOpcode::G_MEMMOVE:
3620 Mopcode = AArch64::MOPSMemoryMovePseudo;
3621 break;
3622 case TargetOpcode::G_MEMSET:
3623 // For tagged memset see llvm.aarch64.mops.memset.tag
3624 Mopcode = AArch64::MOPSMemorySetPseudo;
3625 break;
3626 }
3627
3628 auto &DstPtr = GI.getOperand(0);
3629 auto &SrcOrVal = GI.getOperand(1);
3630 auto &Size = GI.getOperand(2);
3631
3632 // Create copies of the registers that can be clobbered.
3633 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3634 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3635 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3636
3637 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3638 const auto &SrcValRegClass =
3639 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3640
3641 // Constrain to specific registers
3642 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3643 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3644 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3645
3646 MIB.buildCopy(DstPtrCopy, DstPtr);
3647 MIB.buildCopy(SrcValCopy, SrcOrVal);
3648 MIB.buildCopy(SizeCopy, Size);
3649
3650 // New instruction uses the copied registers because it must update them.
3651 // The defs are not used since they don't exist in G_MEM*. They are still
3652 // tied.
3653 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3654 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3655 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3656 if (IsSet) {
3657 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3658 {DstPtrCopy, SizeCopy, SrcValCopy});
3659 } else {
3660 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3661 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3662 {DstPtrCopy, SrcValCopy, SizeCopy});
3663 }
3664
3665 GI.eraseFromParent();
3666 return true;
3667}
3668
3669bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3670 MachineRegisterInfo &MRI) {
3671 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3672 Register JTAddr = I.getOperand(0).getReg();
3673 unsigned JTI = I.getOperand(1).getIndex();
3674 Register Index = I.getOperand(2).getReg();
3675
3676 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3677
3678 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3679 // sequence later, to guarantee the integrity of the intermediate values.
3680 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3682 if (STI.isTargetMachO()) {
3683 if (CM != CodeModel::Small && CM != CodeModel::Large)
3684 report_fatal_error("Unsupported code-model for hardened jump-table");
3685 } else {
3686 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3687 assert(STI.isTargetELF() &&
3688 "jump table hardening only supported on MachO/ELF");
3689 if (CM != CodeModel::Small)
3690 report_fatal_error("Unsupported code-model for hardened jump-table");
3691 }
3692
3693 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3694 MIB.buildInstr(AArch64::BR_JumpTable)
3695 .addJumpTableIndex(I.getOperand(1).getIndex());
3696 I.eraseFromParent();
3697 return true;
3698 }
3699
3700 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3701 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3702
3703 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3704 {TargetReg, ScratchReg}, {JTAddr, Index})
3705 .addJumpTableIndex(JTI);
3706 // Save the jump table info.
3707 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3708 {static_cast<int64_t>(JTI)});
3709 // Build the indirect branch.
3710 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3711 I.eraseFromParent();
3712 constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3713 return true;
3714}
3715
3716bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3717 MachineRegisterInfo &MRI) {
3718 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3719 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3720
3721 Register DstReg = I.getOperand(0).getReg();
3722 unsigned JTI = I.getOperand(1).getIndex();
3723 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3724 auto MovMI =
3725 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3726 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3728 I.eraseFromParent();
3730 return true;
3731}
3732
3733bool AArch64InstructionSelector::selectTLSGlobalValue(
3734 MachineInstr &I, MachineRegisterInfo &MRI) {
3735 if (!STI.isTargetMachO())
3736 return false;
3737 MachineFunction &MF = *I.getParent()->getParent();
3738 MF.getFrameInfo().setAdjustsStack(true);
3739
3740 const auto &GlobalOp = I.getOperand(1);
3741 assert(GlobalOp.getOffset() == 0 &&
3742 "Shouldn't have an offset on TLS globals!");
3743 const GlobalValue &GV = *GlobalOp.getGlobal();
3744
3745 auto LoadGOT =
3746 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3747 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3748
3749 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3750 {LoadGOT.getReg(0)})
3751 .addImm(0);
3752
3753 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3754 // TLS calls preserve all registers except those that absolutely must be
3755 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3756 // silly).
3757 unsigned Opcode = getBLRCallOpcode(MF);
3758
3759 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3760 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3761 assert(Opcode == AArch64::BLR);
3762 Opcode = AArch64::BLRAAZ;
3763 }
3764
3765 MIB.buildInstr(Opcode, {}, {Load})
3766 .addUse(AArch64::X0, RegState::Implicit)
3767 .addDef(AArch64::X0, RegState::Implicit)
3768 .addRegMask(TRI.getTLSCallPreservedMask());
3769
3770 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3771 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3772 MRI);
3773 I.eraseFromParent();
3774 return true;
3775}
3776
3777MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3778 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3779 MachineIRBuilder &MIRBuilder) const {
3780 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3781
3782 auto BuildFn = [&](unsigned SubregIndex) {
3783 auto Ins =
3784 MIRBuilder
3785 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3786 .addImm(SubregIndex);
3789 return &*Ins;
3790 };
3791
3792 switch (EltSize) {
3793 case 8:
3794 return BuildFn(AArch64::bsub);
3795 case 16:
3796 return BuildFn(AArch64::hsub);
3797 case 32:
3798 return BuildFn(AArch64::ssub);
3799 case 64:
3800 return BuildFn(AArch64::dsub);
3801 default:
3802 return nullptr;
3803 }
3804}
3805
3806MachineInstr *
3807AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3808 MachineIRBuilder &MIB,
3809 MachineRegisterInfo &MRI) const {
3810 LLT DstTy = MRI.getType(DstReg);
3811 const TargetRegisterClass *RC =
3812 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3813 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3814 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3815 return nullptr;
3816 }
3817 unsigned SubReg = 0;
3818 if (!getSubRegForClass(RC, TRI, SubReg))
3819 return nullptr;
3820 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3821 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3822 << DstTy.getSizeInBits() << "\n");
3823 return nullptr;
3824 }
3825 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3826 .addReg(SrcReg, {}, SubReg);
3827 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3828 return Copy;
3829}
3830
3831bool AArch64InstructionSelector::selectMergeValues(
3832 MachineInstr &I, MachineRegisterInfo &MRI) {
3833 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3834 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3835 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3836 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3837 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3838
3839 if (I.getNumOperands() != 3)
3840 return false;
3841
3842 // Merging 2 s64s into an s128.
3843 if (DstTy == LLT::scalar(128)) {
3844 if (SrcTy.getSizeInBits() != 64)
3845 return false;
3846 Register DstReg = I.getOperand(0).getReg();
3847 Register Src1Reg = I.getOperand(1).getReg();
3848 Register Src2Reg = I.getOperand(2).getReg();
3849 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3850 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3851 /* LaneIdx */ 0, RB, MIB);
3852 if (!InsMI)
3853 return false;
3854 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3855 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3856 if (!Ins2MI)
3857 return false;
3860 I.eraseFromParent();
3861 return true;
3862 }
3863
3864 if (RB.getID() != AArch64::GPRRegBankID)
3865 return false;
3866
3867 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3868 return false;
3869
3870 auto *DstRC = &AArch64::GPR64RegClass;
3871 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3872 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3873 TII.get(TargetOpcode::SUBREG_TO_REG))
3874 .addDef(SubToRegDef)
3875 .addUse(I.getOperand(1).getReg())
3876 .addImm(AArch64::sub_32);
3877 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3878 // Need to anyext the second scalar before we can use bfm
3879 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3880 TII.get(TargetOpcode::SUBREG_TO_REG))
3881 .addDef(SubToRegDef2)
3882 .addUse(I.getOperand(2).getReg())
3883 .addImm(AArch64::sub_32);
3884 MachineInstr &BFM =
3885 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3886 .addDef(I.getOperand(0).getReg())
3887 .addUse(SubToRegDef)
3888 .addUse(SubToRegDef2)
3889 .addImm(32)
3890 .addImm(31);
3891 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3892 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3894 I.eraseFromParent();
3895 return true;
3896}
3897
3898static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3899 const unsigned EltSize) {
3900 // Choose a lane copy opcode and subregister based off of the size of the
3901 // vector's elements.
3902 switch (EltSize) {
3903 case 8:
3904 CopyOpc = AArch64::DUPi8;
3905 ExtractSubReg = AArch64::bsub;
3906 break;
3907 case 16:
3908 CopyOpc = AArch64::DUPi16;
3909 ExtractSubReg = AArch64::hsub;
3910 break;
3911 case 32:
3912 CopyOpc = AArch64::DUPi32;
3913 ExtractSubReg = AArch64::ssub;
3914 break;
3915 case 64:
3916 CopyOpc = AArch64::DUPi64;
3917 ExtractSubReg = AArch64::dsub;
3918 break;
3919 default:
3920 // Unknown size, bail out.
3921 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3922 return false;
3923 }
3924 return true;
3925}
3926
3927MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3928 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3929 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3930 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3931 unsigned CopyOpc = 0;
3932 unsigned ExtractSubReg = 0;
3933 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3934 LLVM_DEBUG(
3935 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3936 return nullptr;
3937 }
3938
3939 const TargetRegisterClass *DstRC =
3940 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3941 if (!DstRC) {
3942 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3943 return nullptr;
3944 }
3945
3946 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3947 const LLT &VecTy = MRI.getType(VecReg);
3948 const TargetRegisterClass *VecRC =
3949 getRegClassForTypeOnBank(VecTy, VecRB, true);
3950 if (!VecRC) {
3951 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3952 return nullptr;
3953 }
3954
3955 // The register that we're going to copy into.
3956 Register InsertReg = VecReg;
3957 if (!DstReg)
3958 DstReg = MRI.createVirtualRegister(DstRC);
3959 // If the lane index is 0, we just use a subregister COPY.
3960 if (LaneIdx == 0) {
3961 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3962 .addReg(VecReg, {}, ExtractSubReg);
3963 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3964 return &*Copy;
3965 }
3966
3967 // Lane copies require 128-bit wide registers. If we're dealing with an
3968 // unpacked vector, then we need to move up to that width. Insert an implicit
3969 // def and a subregister insert to get us there.
3970 if (VecTy.getSizeInBits() != 128) {
3971 MachineInstr *ScalarToVector = emitScalarToVector(
3972 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3973 if (!ScalarToVector)
3974 return nullptr;
3975 InsertReg = ScalarToVector->getOperand(0).getReg();
3976 }
3977
3978 MachineInstr *LaneCopyMI =
3979 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3980 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3981
3982 // Make sure that we actually constrain the initial copy.
3983 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3984 return LaneCopyMI;
3985}
3986
3987bool AArch64InstructionSelector::selectExtractElt(
3988 MachineInstr &I, MachineRegisterInfo &MRI) {
3989 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3990 "unexpected opcode!");
3991 Register DstReg = I.getOperand(0).getReg();
3992 const LLT NarrowTy = MRI.getType(DstReg);
3993 const Register SrcReg = I.getOperand(1).getReg();
3994 const LLT WideTy = MRI.getType(SrcReg);
3995 (void)WideTy;
3996 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3997 "source register size too small!");
3998 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3999
4000 // Need the lane index to determine the correct copy opcode.
4001 MachineOperand &LaneIdxOp = I.getOperand(2);
4002 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4003
4004 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4005 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4006 return false;
4007 }
4008
4009 // Find the index to extract from.
4010 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4011 if (!VRegAndVal)
4012 return false;
4013 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4014
4015
4016 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4017 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4018 LaneIdx, MIB);
4019 if (!Extract)
4020 return false;
4021
4022 I.eraseFromParent();
4023 return true;
4024}
4025
4026bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4027 MachineInstr &I, MachineRegisterInfo &MRI) {
4028 unsigned NumElts = I.getNumOperands() - 1;
4029 Register SrcReg = I.getOperand(NumElts).getReg();
4030 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4031 const LLT SrcTy = MRI.getType(SrcReg);
4032
4033 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4034 if (SrcTy.getSizeInBits() > 128) {
4035 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4036 return false;
4037 }
4038
4039 // We implement a split vector operation by treating the sub-vectors as
4040 // scalars and extracting them.
4041 const RegisterBank &DstRB =
4042 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4043 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4044 Register Dst = I.getOperand(OpIdx).getReg();
4045 MachineInstr *Extract =
4046 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4047 if (!Extract)
4048 return false;
4049 }
4050 I.eraseFromParent();
4051 return true;
4052}
4053
4054bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4055 MachineRegisterInfo &MRI) {
4056 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4057 "unexpected opcode");
4058
4059 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4060 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4061 AArch64::FPRRegBankID ||
4062 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4063 AArch64::FPRRegBankID) {
4064 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4065 "currently unsupported.\n");
4066 return false;
4067 }
4068
4069 // The last operand is the vector source register, and every other operand is
4070 // a register to unpack into.
4071 unsigned NumElts = I.getNumOperands() - 1;
4072 Register SrcReg = I.getOperand(NumElts).getReg();
4073 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4074 const LLT WideTy = MRI.getType(SrcReg);
4075
4076 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4077 "source register size too small!");
4078
4079 if (!NarrowTy.isScalar())
4080 return selectSplitVectorUnmerge(I, MRI);
4081
4082 // Choose a lane copy opcode and subregister based off of the size of the
4083 // vector's elements.
4084 unsigned CopyOpc = 0;
4085 unsigned ExtractSubReg = 0;
4086 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4087 return false;
4088
4089 // Set up for the lane copies.
4090 MachineBasicBlock &MBB = *I.getParent();
4091
4092 // Stores the registers we'll be copying from.
4093 SmallVector<Register, 4> InsertRegs;
4094
4095 // We'll use the first register twice, so we only need NumElts-1 registers.
4096 unsigned NumInsertRegs = NumElts - 1;
4097
4098 // If our elements fit into exactly 128 bits, then we can copy from the source
4099 // directly. Otherwise, we need to do a bit of setup with some subregister
4100 // inserts.
4101 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4102 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4103 } else {
4104 // No. We have to perform subregister inserts. For each insert, create an
4105 // implicit def and a subregister insert, and save the register we create.
4106 // For scalar sources, treat as a pseudo-vector of NarrowTy elements.
4107 unsigned EltSize = WideTy.isVector() ? WideTy.getScalarSizeInBits()
4108 : NarrowTy.getSizeInBits();
4109 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4110 LLT::fixed_vector(NumElts, EltSize), *RBI.getRegBank(SrcReg, MRI, TRI));
4111 unsigned SubReg = 0;
4112 bool Found = getSubRegForClass(RC, TRI, SubReg);
4113 (void)Found;
4114 assert(Found && "expected to find last operand's subeg idx");
4115 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4116 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4117 MachineInstr &ImpDefMI =
4118 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4119 ImpDefReg);
4120
4121 // Now, create the subregister insert from SrcReg.
4122 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4123 MachineInstr &InsMI =
4124 *BuildMI(MBB, I, I.getDebugLoc(),
4125 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4126 .addUse(ImpDefReg)
4127 .addUse(SrcReg)
4128 .addImm(SubReg);
4129
4130 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4132
4133 // Save the register so that we can copy from it after.
4134 InsertRegs.push_back(InsertReg);
4135 }
4136 }
4137
4138 // Now that we've created any necessary subregister inserts, we can
4139 // create the copies.
4140 //
4141 // Perform the first copy separately as a subregister copy.
4142 Register CopyTo = I.getOperand(0).getReg();
4143 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4144 .addReg(InsertRegs[0], {}, ExtractSubReg);
4145 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4146
4147 // Now, perform the remaining copies as vector lane copies.
4148 unsigned LaneIdx = 1;
4149 for (Register InsReg : InsertRegs) {
4150 Register CopyTo = I.getOperand(LaneIdx).getReg();
4151 MachineInstr &CopyInst =
4152 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4153 .addUse(InsReg)
4154 .addImm(LaneIdx);
4155 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4156 ++LaneIdx;
4157 }
4158
4159 // Separately constrain the first copy's destination. Because of the
4160 // limitation in constrainOperandRegClass, we can't guarantee that this will
4161 // actually be constrained. So, do it ourselves using the second operand.
4162 const TargetRegisterClass *RC =
4163 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4164 if (!RC) {
4165 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4166 return false;
4167 }
4168
4169 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4170 I.eraseFromParent();
4171 return true;
4172}
4173
4174bool AArch64InstructionSelector::selectConcatVectors(
4175 MachineInstr &I, MachineRegisterInfo &MRI) {
4176 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4177 "Unexpected opcode");
4178 Register Dst = I.getOperand(0).getReg();
4179 Register Op1 = I.getOperand(1).getReg();
4180 Register Op2 = I.getOperand(2).getReg();
4181 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4182 if (!ConcatMI)
4183 return false;
4184 I.eraseFromParent();
4185 return true;
4186}
4187
4188unsigned
4189AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4190 MachineFunction &MF) const {
4191 Type *CPTy = CPVal->getType();
4192 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4193
4194 MachineConstantPool *MCP = MF.getConstantPool();
4195 return MCP->getConstantPoolIndex(CPVal, Alignment);
4196}
4197
4198MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4199 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4200 const TargetRegisterClass *RC;
4201 unsigned Opc;
4202 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4203 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4204 switch (Size) {
4205 case 16:
4206 RC = &AArch64::FPR128RegClass;
4207 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4208 break;
4209 case 8:
4210 RC = &AArch64::FPR64RegClass;
4211 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4212 break;
4213 case 4:
4214 RC = &AArch64::FPR32RegClass;
4215 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4216 break;
4217 case 2:
4218 RC = &AArch64::FPR16RegClass;
4219 Opc = AArch64::LDRHui;
4220 break;
4221 default:
4222 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4223 << *CPVal->getType());
4224 return nullptr;
4225 }
4226
4227 MachineInstr *LoadMI = nullptr;
4228 auto &MF = MIRBuilder.getMF();
4229 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4230 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4231 // Use load(literal) for tiny code model.
4232 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4233 } else {
4234 auto Adrp =
4235 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4236 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4237
4238 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4239 .addConstantPoolIndex(
4241
4243 }
4244
4245 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4246 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4248 Size, Align(Size)));
4250 return LoadMI;
4251}
4252
4253/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4254/// size and RB.
4255static std::pair<unsigned, unsigned>
4256getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4257 unsigned Opc, SubregIdx;
4258 if (RB.getID() == AArch64::GPRRegBankID) {
4259 if (EltSize == 8) {
4260 Opc = AArch64::INSvi8gpr;
4261 SubregIdx = AArch64::bsub;
4262 } else if (EltSize == 16) {
4263 Opc = AArch64::INSvi16gpr;
4264 SubregIdx = AArch64::ssub;
4265 } else if (EltSize == 32) {
4266 Opc = AArch64::INSvi32gpr;
4267 SubregIdx = AArch64::ssub;
4268 } else if (EltSize == 64) {
4269 Opc = AArch64::INSvi64gpr;
4270 SubregIdx = AArch64::dsub;
4271 } else {
4272 llvm_unreachable("invalid elt size!");
4273 }
4274 } else {
4275 if (EltSize == 8) {
4276 Opc = AArch64::INSvi8lane;
4277 SubregIdx = AArch64::bsub;
4278 } else if (EltSize == 16) {
4279 Opc = AArch64::INSvi16lane;
4280 SubregIdx = AArch64::hsub;
4281 } else if (EltSize == 32) {
4282 Opc = AArch64::INSvi32lane;
4283 SubregIdx = AArch64::ssub;
4284 } else if (EltSize == 64) {
4285 Opc = AArch64::INSvi64lane;
4286 SubregIdx = AArch64::dsub;
4287 } else {
4288 llvm_unreachable("invalid elt size!");
4289 }
4290 }
4291 return std::make_pair(Opc, SubregIdx);
4292}
4293
4294MachineInstr *AArch64InstructionSelector::emitInstr(
4295 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4296 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4297 const ComplexRendererFns &RenderFns) const {
4298 assert(Opcode && "Expected an opcode?");
4299 assert(!isPreISelGenericOpcode(Opcode) &&
4300 "Function should only be used to produce selected instructions!");
4301 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4302 if (RenderFns)
4303 for (auto &Fn : *RenderFns)
4304 Fn(MI);
4306 return &*MI;
4307}
4308
4309MachineInstr *AArch64InstructionSelector::emitAddSub(
4310 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4311 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4312 MachineIRBuilder &MIRBuilder) const {
4313 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4314 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4315 auto Ty = MRI.getType(LHS.getReg());
4316 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4317 unsigned Size = Ty.getSizeInBits();
4318 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4319 bool Is32Bit = Size == 32;
4320
4321 // INSTRri form with positive arithmetic immediate.
4322 if (auto Fns = selectArithImmed(RHS))
4323 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4324 MIRBuilder, Fns);
4325
4326 // INSTRri form with negative arithmetic immediate.
4327 if (auto Fns = selectNegArithImmed(RHS))
4328 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4329 MIRBuilder, Fns);
4330
4331 // INSTRrx form.
4332 if (auto Fns = selectArithExtendedRegister(RHS))
4333 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4334 MIRBuilder, Fns);
4335
4336 // INSTRrs form.
4337 if (auto Fns = selectShiftedRegister(RHS))
4338 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4339 MIRBuilder, Fns);
4340 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4341 MIRBuilder);
4342}
4343
4344MachineInstr *
4345AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4346 MachineOperand &RHS,
4347 MachineIRBuilder &MIRBuilder) const {
4348 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4349 {{AArch64::ADDXri, AArch64::ADDWri},
4350 {AArch64::ADDXrs, AArch64::ADDWrs},
4351 {AArch64::ADDXrr, AArch64::ADDWrr},
4352 {AArch64::SUBXri, AArch64::SUBWri},
4353 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4354 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4355}
4356
4357MachineInstr *
4358AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4359 MachineOperand &RHS,
4360 MachineIRBuilder &MIRBuilder) const {
4361 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4362 {{AArch64::ADDSXri, AArch64::ADDSWri},
4363 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4364 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4365 {AArch64::SUBSXri, AArch64::SUBSWri},
4366 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4367 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4368}
4369
4370MachineInstr *
4371AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4372 MachineOperand &RHS,
4373 MachineIRBuilder &MIRBuilder) const {
4374 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4375 {{AArch64::SUBSXri, AArch64::SUBSWri},
4376 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4377 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4378 {AArch64::ADDSXri, AArch64::ADDSWri},
4379 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4380 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4381}
4382
4383MachineInstr *
4384AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4385 MachineOperand &RHS,
4386 MachineIRBuilder &MIRBuilder) const {
4387 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4388 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4389 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4390 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4391 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4392}
4393
4394MachineInstr *
4395AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4396 MachineOperand &RHS,
4397 MachineIRBuilder &MIRBuilder) const {
4398 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4399 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4400 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4401 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4402 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4403}
4404
4405MachineInstr *
4406AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4407 MachineIRBuilder &MIRBuilder) const {
4408 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4409 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
4410 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4411 return emitSUBS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4412}
4413
4414MachineInstr *
4415AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4416 MachineIRBuilder &MIRBuilder) const {
4417 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4418 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4419 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4420 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4421}
4422
4423MachineInstr *
4424AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4425 MachineIRBuilder &MIRBuilder) const {
4426 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4427 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4428 LLT Ty = MRI.getType(LHS.getReg());
4429 unsigned RegSize = Ty.getSizeInBits();
4430 bool Is32Bit = (RegSize == 32);
4431 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4432 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4433 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4434 // ANDS needs a logical immediate for its immediate form. Check if we can
4435 // fold one in.
4436 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4437 int64_t Imm = ValAndVReg->Value.getSExtValue();
4438
4440 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4443 return &*TstMI;
4444 }
4445 }
4446
4447 if (auto Fns = selectLogicalShiftedRegister(RHS))
4448 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4449 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4450}
4451
4452MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4453 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4454 MachineIRBuilder &MIRBuilder) const {
4455 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4456 assert(Predicate.isPredicate() && "Expected predicate?");
4457 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4458 LLT CmpTy = MRI.getType(LHS.getReg());
4459 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4460 unsigned Size = CmpTy.getSizeInBits();
4461 (void)Size;
4462 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4463 // Fold the compare into a cmn or tst if possible.
4464 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4465 return FoldCmp;
4466 return emitCMP(LHS, RHS, MIRBuilder);
4467}
4468
4469MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4470 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4471 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4472#ifndef NDEBUG
4473 LLT Ty = MRI.getType(Dst);
4474 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4475 "Expected a 32-bit scalar register?");
4476#endif
4477 const Register ZReg = AArch64::WZR;
4478 AArch64CC::CondCode CC1, CC2;
4479 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4480 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4481 if (CC2 == AArch64CC::AL)
4482 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4483 MIRBuilder);
4484 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4485 Register Def1Reg = MRI.createVirtualRegister(RC);
4486 Register Def2Reg = MRI.createVirtualRegister(RC);
4487 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4488 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4489 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4490 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4492 return &*OrMI;
4493}
4494
4495MachineInstr *AArch64InstructionSelector::emitFPCompare(
4496 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4497 std::optional<CmpInst::Predicate> Pred) const {
4498 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4499 LLT Ty = MRI.getType(LHS);
4500 if (Ty.isVector())
4501 return nullptr;
4502 unsigned OpSize = Ty.getSizeInBits();
4503 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4504
4505 // If this is a compare against +0.0, then we don't have
4506 // to explicitly materialize a constant.
4507 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4508 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4509
4510 auto IsEqualityPred = [](CmpInst::Predicate P) {
4511 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4513 };
4514 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4515 // Try commuting the operands.
4516 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4517 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4518 ShouldUseImm = true;
4519 std::swap(LHS, RHS);
4520 }
4521 }
4522 unsigned CmpOpcTbl[2][3] = {
4523 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4524 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4525 unsigned CmpOpc =
4526 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4527
4528 // Partially build the compare. Decide if we need to add a use for the
4529 // third operand based off whether or not we're comparing against 0.0.
4530 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4532 if (!ShouldUseImm)
4533 CmpMI.addUse(RHS);
4535 return &*CmpMI;
4536}
4537
4538MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4539 std::optional<Register> Dst, Register Op1, Register Op2,
4540 MachineIRBuilder &MIRBuilder) const {
4541 // We implement a vector concat by:
4542 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4543 // 2. Insert the upper vector into the destination's upper element
4544 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4545 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4546
4547 const LLT Op1Ty = MRI.getType(Op1);
4548 const LLT Op2Ty = MRI.getType(Op2);
4549
4550 if (Op1Ty != Op2Ty) {
4551 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4552 return nullptr;
4553 }
4554 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4555
4556 if (Op1Ty.getSizeInBits() >= 128) {
4557 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4558 return nullptr;
4559 }
4560
4561 // At the moment we just support 64 bit vector concats.
4562 if (Op1Ty.getSizeInBits() != 64) {
4563 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4564 return nullptr;
4565 }
4566
4567 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4568 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4569 const TargetRegisterClass *DstRC =
4570 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4571
4572 MachineInstr *WidenedOp1 =
4573 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4574 MachineInstr *WidenedOp2 =
4575 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4576 if (!WidenedOp1 || !WidenedOp2) {
4577 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4578 return nullptr;
4579 }
4580
4581 // Now do the insert of the upper element.
4582 unsigned InsertOpc, InsSubRegIdx;
4583 std::tie(InsertOpc, InsSubRegIdx) =
4584 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4585
4586 if (!Dst)
4587 Dst = MRI.createVirtualRegister(DstRC);
4588 auto InsElt =
4589 MIRBuilder
4590 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4591 .addImm(1) /* Lane index */
4592 .addUse(WidenedOp2->getOperand(0).getReg())
4593 .addImm(0);
4595 return &*InsElt;
4596}
4597
4598MachineInstr *
4599AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4600 Register Src2, AArch64CC::CondCode Pred,
4601 MachineIRBuilder &MIRBuilder) const {
4602 auto &MRI = *MIRBuilder.getMRI();
4603 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4604 // If we used a register class, then this won't necessarily have an LLT.
4605 // Compute the size based off whether or not we have a class or bank.
4606 unsigned Size;
4607 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4608 Size = TRI.getRegSizeInBits(*RC);
4609 else
4610 Size = MRI.getType(Dst).getSizeInBits();
4611 // Some opcodes use s1.
4612 assert(Size <= 64 && "Expected 64 bits or less only!");
4613 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4614 unsigned Opc = OpcTable[Size == 64];
4615 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4617 return &*CSINC;
4618}
4619
4620MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4621 Register CarryReg) {
4622 MachineRegisterInfo *MRI = MIB.getMRI();
4623 unsigned Opcode = I.getOpcode();
4624
4625 // If the instruction is a SUB, we need to negate the carry,
4626 // because borrowing is indicated by carry-flag == 0.
4627 bool NeedsNegatedCarry =
4628 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4629
4630 // If the previous instruction will already produce the correct carry, do not
4631 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4632 // generated during legalization of wide add/sub. This optimization depends on
4633 // these sequences not being interrupted by other instructions.
4634 // We have to select the previous instruction before the carry-using
4635 // instruction is deleted by the calling function, otherwise the previous
4636 // instruction might become dead and would get deleted.
4637 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4638 if (SrcMI == I.getPrevNode()) {
4639 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4640 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4641 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4642 CarrySrcMI->isUnsigned() &&
4643 CarrySrcMI->getCarryOutReg() == CarryReg &&
4644 selectAndRestoreState(*SrcMI))
4645 return nullptr;
4646 }
4647 }
4648
4649 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4650
4651 if (NeedsNegatedCarry) {
4652 // (0 - Carry) sets !C in NZCV when Carry == 1
4653 Register ZReg = AArch64::WZR;
4654 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4655 }
4656
4657 // (Carry - 1) sets !C in NZCV when Carry == 0
4658 auto Fns = select12BitValueWithLeftShift(1);
4659 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4660}
4661
4662bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4663 MachineRegisterInfo &MRI) {
4664 auto &CarryMI = cast<GAddSubCarryOut>(I);
4665
4666 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4667 // Set NZCV carry according to carry-in VReg
4668 emitCarryIn(I, CarryInMI->getCarryInReg());
4669 }
4670
4671 // Emit the operation and get the correct condition code.
4672 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4673 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4674
4675 Register CarryOutReg = CarryMI.getCarryOutReg();
4676
4677 // Don't convert carry-out to VReg if it is never used
4678 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4679 // Now, put the overflow result in the register given by the first operand
4680 // to the overflow op. CSINC increments the result when the predicate is
4681 // false, so to get the increment when it's true, we need to use the
4682 // inverse. In this case, we want to increment when carry is set.
4683 Register ZReg = AArch64::WZR;
4684 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4685 getInvertedCondCode(OpAndCC.second), MIB);
4686 }
4687
4688 I.eraseFromParent();
4689 return true;
4690}
4691
4692std::pair<MachineInstr *, AArch64CC::CondCode>
4693AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4694 MachineOperand &LHS,
4695 MachineOperand &RHS,
4696 MachineIRBuilder &MIRBuilder) const {
4697 switch (Opcode) {
4698 default:
4699 llvm_unreachable("Unexpected opcode!");
4700 case TargetOpcode::G_SADDO:
4701 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4702 case TargetOpcode::G_UADDO:
4703 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4704 case TargetOpcode::G_SSUBO:
4705 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4706 case TargetOpcode::G_USUBO:
4707 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4708 case TargetOpcode::G_SADDE:
4709 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4710 case TargetOpcode::G_UADDE:
4711 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4712 case TargetOpcode::G_SSUBE:
4713 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4714 case TargetOpcode::G_USUBE:
4715 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4716 }
4717}
4718
4719/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4720/// expressed as a conjunction.
4721/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4722/// changing the conditions on the CMP tests.
4723/// (this means we can call emitConjunctionRec() with
4724/// Negate==true on this sub-tree)
4725/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4726/// cannot do the negation naturally. We are required to
4727/// emit the subtree first in this case.
4728/// \param WillNegate Is true if are called when the result of this
4729/// subexpression must be negated. This happens when the
4730/// outer expression is an OR. We can use this fact to know
4731/// that we have a double negation (or (or ...) ...) that
4732/// can be implemented for free.
4733static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4734 bool WillNegate, MachineRegisterInfo &MRI,
4735 unsigned Depth = 0) {
4736 if (!MRI.hasOneNonDBGUse(Val))
4737 return false;
4738 MachineInstr *ValDef = MRI.getVRegDef(Val);
4739 unsigned Opcode = ValDef->getOpcode();
4740 if (isa<GAnyCmp>(ValDef)) {
4741 CanNegate = true;
4742 MustBeFirst = false;
4743 return true;
4744 }
4745 // Protect against exponential runtime and stack overflow.
4746 if (Depth > 6)
4747 return false;
4748 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4749 bool IsOR = Opcode == TargetOpcode::G_OR;
4750 Register O0 = ValDef->getOperand(1).getReg();
4751 Register O1 = ValDef->getOperand(2).getReg();
4752 bool CanNegateL;
4753 bool MustBeFirstL;
4754 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4755 return false;
4756 bool CanNegateR;
4757 bool MustBeFirstR;
4758 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4759 return false;
4760
4761 if (MustBeFirstL && MustBeFirstR)
4762 return false;
4763
4764 if (IsOR) {
4765 // For an OR expression we need to be able to naturally negate at least
4766 // one side or we cannot do the transformation at all.
4767 if (!CanNegateL && !CanNegateR)
4768 return false;
4769 // If we the result of the OR will be negated and we can naturally negate
4770 // the leaves, then this sub-tree as a whole negates naturally.
4771 CanNegate = WillNegate && CanNegateL && CanNegateR;
4772 // If we cannot naturally negate the whole sub-tree, then this must be
4773 // emitted first.
4774 MustBeFirst = !CanNegate;
4775 } else {
4776 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4777 // We cannot naturally negate an AND operation.
4778 CanNegate = false;
4779 MustBeFirst = MustBeFirstL || MustBeFirstR;
4780 }
4781 return true;
4782 }
4783 return false;
4784}
4785
4786MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4789 MachineIRBuilder &MIB) const {
4790 auto &MRI = *MIB.getMRI();
4791 LLT OpTy = MRI.getType(LHS);
4792 unsigned CCmpOpc;
4793 std::optional<ValueAndVReg> C;
4794 if (CmpInst::isIntPredicate(CC)) {
4795 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4797 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4798 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4799 else if (C->Value.ule(31))
4800 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4801 else
4802 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4803 } else {
4804 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4805 OpTy.getSizeInBits() == 64);
4806 switch (OpTy.getSizeInBits()) {
4807 case 16:
4808 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4809 CCmpOpc = AArch64::FCCMPHrr;
4810 break;
4811 case 32:
4812 CCmpOpc = AArch64::FCCMPSrr;
4813 break;
4814 case 64:
4815 CCmpOpc = AArch64::FCCMPDrr;
4816 break;
4817 default:
4818 return nullptr;
4819 }
4820 }
4822 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4823 auto CCmp =
4824 MIB.buildInstr(CCmpOpc, {}, {LHS});
4825 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4826 CCmp.addImm(C->Value.getZExtValue());
4827 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4828 CCmp.addImm(C->Value.abs().getZExtValue());
4829 else
4830 CCmp.addReg(RHS);
4831 CCmp.addImm(NZCV).addImm(Predicate);
4833 return &*CCmp;
4834}
4835
4836MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4837 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4838 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4839 // We're at a tree leaf, produce a conditional comparison operation.
4840 auto &MRI = *MIB.getMRI();
4841 MachineInstr *ValDef = MRI.getVRegDef(Val);
4842 unsigned Opcode = ValDef->getOpcode();
4843 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4844 Register LHS = Cmp->getLHSReg();
4845 Register RHS = Cmp->getRHSReg();
4846 CmpInst::Predicate CC = Cmp->getCond();
4847 if (Negate)
4849 if (isa<GICmp>(Cmp)) {
4850 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4851 } else {
4852 // Handle special FP cases.
4853 AArch64CC::CondCode ExtraCC;
4854 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4855 // Some floating point conditions can't be tested with a single condition
4856 // code. Construct an additional comparison in this case.
4857 if (ExtraCC != AArch64CC::AL) {
4858 MachineInstr *ExtraCmp;
4859 if (!CCOp)
4860 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4861 else
4862 ExtraCmp =
4863 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4864 CCOp = ExtraCmp->getOperand(0).getReg();
4865 Predicate = ExtraCC;
4866 }
4867 }
4868
4869 // Produce a normal comparison if we are first in the chain
4870 if (!CCOp) {
4871 if (isa<GICmp>(Cmp))
4872 return emitCMP(Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4873 return emitFPCompare(Cmp->getOperand(2).getReg(),
4874 Cmp->getOperand(3).getReg(), MIB);
4875 }
4876 // Otherwise produce a ccmp.
4877 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4878 }
4879 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4880
4881 bool IsOR = Opcode == TargetOpcode::G_OR;
4882
4883 Register LHS = ValDef->getOperand(1).getReg();
4884 bool CanNegateL;
4885 bool MustBeFirstL;
4886 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4887 assert(ValidL && "Valid conjunction/disjunction tree");
4888 (void)ValidL;
4889
4890 Register RHS = ValDef->getOperand(2).getReg();
4891 bool CanNegateR;
4892 bool MustBeFirstR;
4893 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4894 assert(ValidR && "Valid conjunction/disjunction tree");
4895 (void)ValidR;
4896
4897 // Swap sub-tree that must come first to the right side.
4898 if (MustBeFirstL) {
4899 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4900 std::swap(LHS, RHS);
4901 std::swap(CanNegateL, CanNegateR);
4902 std::swap(MustBeFirstL, MustBeFirstR);
4903 }
4904
4905 bool NegateR;
4906 bool NegateAfterR;
4907 bool NegateL;
4908 bool NegateAfterAll;
4909 if (Opcode == TargetOpcode::G_OR) {
4910 // Swap the sub-tree that we can negate naturally to the left.
4911 if (!CanNegateL) {
4912 assert(CanNegateR && "at least one side must be negatable");
4913 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4914 assert(!Negate);
4915 std::swap(LHS, RHS);
4916 NegateR = false;
4917 NegateAfterR = true;
4918 } else {
4919 // Negate the left sub-tree if possible, otherwise negate the result.
4920 NegateR = CanNegateR;
4921 NegateAfterR = !CanNegateR;
4922 }
4923 NegateL = true;
4924 NegateAfterAll = !Negate;
4925 } else {
4926 assert(Opcode == TargetOpcode::G_AND &&
4927 "Valid conjunction/disjunction tree");
4928 assert(!Negate && "Valid conjunction/disjunction tree");
4929
4930 NegateL = false;
4931 NegateR = false;
4932 NegateAfterR = false;
4933 NegateAfterAll = false;
4934 }
4935
4936 // Emit sub-trees.
4937 AArch64CC::CondCode RHSCC;
4938 MachineInstr *CmpR =
4939 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4940 if (NegateAfterR)
4941 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4942 MachineInstr *CmpL = emitConjunctionRec(
4943 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4944 if (NegateAfterAll)
4945 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4946 return CmpL;
4947}
4948
4949MachineInstr *AArch64InstructionSelector::emitConjunction(
4950 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4951 bool DummyCanNegate;
4952 bool DummyMustBeFirst;
4953 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4954 *MIB.getMRI()))
4955 return nullptr;
4956 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4957}
4958
4959bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4960 MachineInstr &CondMI) {
4961 AArch64CC::CondCode AArch64CC;
4962 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4963 if (!ConjMI)
4964 return false;
4965
4966 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4967 SelI.eraseFromParent();
4968 return true;
4969}
4970
4971bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4972 MachineRegisterInfo &MRI = *MIB.getMRI();
4973 // We want to recognize this pattern:
4974 //
4975 // $z = G_FCMP pred, $x, $y
4976 // ...
4977 // $w = G_SELECT $z, $a, $b
4978 //
4979 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4980 // some copies/truncs in between.)
4981 //
4982 // If we see this, then we can emit something like this:
4983 //
4984 // fcmp $x, $y
4985 // fcsel $w, $a, $b, pred
4986 //
4987 // Rather than emitting both of the rather long sequences in the standard
4988 // G_FCMP/G_SELECT select methods.
4989
4990 // First, check if the condition is defined by a compare.
4991 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4992
4993 // We can only fold if all of the defs have one use.
4994 Register CondDefReg = CondDef->getOperand(0).getReg();
4995 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4996 // Unless it's another select.
4997 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4998 if (CondDef == &UI)
4999 continue;
5000 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5001 return false;
5002 }
5003 }
5004
5005 // Is the condition defined by a compare?
5006 unsigned CondOpc = CondDef->getOpcode();
5007 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5008 if (tryOptSelectConjunction(I, *CondDef))
5009 return true;
5010 return false;
5011 }
5012
5014 if (CondOpc == TargetOpcode::G_ICMP) {
5015 auto &PredOp = CondDef->getOperand(1);
5016 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5017 MIB);
5018 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5019 CondCode =
5020 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5021 } else {
5022 // Get the condition code for the select.
5023 auto Pred =
5024 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5025 AArch64CC::CondCode CondCode2;
5026 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5027
5028 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5029 // instructions to emit the comparison.
5030 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5031 // unnecessary.
5032 if (CondCode2 != AArch64CC::AL)
5033 return false;
5034
5035 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5036 CondDef->getOperand(3).getReg(), MIB)) {
5037 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5038 return false;
5039 }
5040 }
5041
5042 // Emit the select.
5043 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5044 I.getOperand(3).getReg(), CondCode, MIB);
5045 I.eraseFromParent();
5046 return true;
5047}
5048
5049MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5050 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5051 MachineIRBuilder &MIRBuilder) const {
5052 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5053 "Unexpected MachineOperand");
5054 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5055 // We want to find this sort of thing:
5056 // x = G_SUB 0, y
5057 // G_ICMP z, x
5058 //
5059 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5060 // e.g:
5061 //
5062 // cmn z, y
5063
5064 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5065 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5066 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5067 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5068
5069 // Given this:
5070 //
5071 // x = G_SUB 0, y
5072 // G_ICMP z, x
5073 //
5074 // Produce this:
5075 //
5076 // cmn z, y
5077 if (isCMN(RHSDef, P, MRI))
5078 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5079
5080 // Same idea here, but with the LHS of the compare instead:
5081 //
5082 // Given this:
5083 //
5084 // x = G_SUB 0, y
5085 // G_ICMP x, z
5086 //
5087 // Produce this:
5088 //
5089 // cmn y, z
5090 //
5091 // But be careful! We need to swap the predicate!
5092 if (isCMN(LHSDef, P, MRI)) {
5093 if (!CmpInst::isEquality(P)) {
5096 }
5097 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5098 }
5099
5100 // Given this:
5101 //
5102 // z = G_AND x, y
5103 // G_ICMP z, 0
5104 //
5105 // Produce this if the compare is signed:
5106 //
5107 // tst x, y
5108 if (!CmpInst::isUnsigned(P) && LHSDef &&
5109 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5110 // Make sure that the RHS is 0.
5111 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5112 if (!ValAndVReg || ValAndVReg->Value != 0)
5113 return nullptr;
5114
5115 return emitTST(LHSDef->getOperand(1),
5116 LHSDef->getOperand(2), MIRBuilder);
5117 }
5118
5119 return nullptr;
5120}
5121
5122bool AArch64InstructionSelector::selectShuffleVector(
5123 MachineInstr &I, MachineRegisterInfo &MRI) {
5124 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5125 Register Src1Reg = I.getOperand(1).getReg();
5126 Register Src2Reg = I.getOperand(2).getReg();
5127 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5128
5129 MachineBasicBlock &MBB = *I.getParent();
5130 MachineFunction &MF = *MBB.getParent();
5131 LLVMContext &Ctx = MF.getFunction().getContext();
5132
5133 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5134
5136 for (int Val : Mask) {
5137 // For now, any undef indexes we'll just assume to be 0. This should be
5138 // optimized in future, e.g. to select DUP etc.
5139 Val = Val < 0 ? 0 : Val;
5140 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5141 unsigned Offset = Byte + Val * BytesPerElt;
5142 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5143 }
5144 }
5145
5146 // Use a constant pool to load the index vector for TBL.
5147 Constant *CPVal = ConstantVector::get(CstIdxs);
5148 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5149 if (!IndexLoad) {
5150 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5151 return false;
5152 }
5153
5154 if (DstTy.getSizeInBits() != 128) {
5155 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5156 // This case can be done with TBL1.
5157 MachineInstr *Concat =
5158 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5159 if (!Concat) {
5160 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5161 return false;
5162 }
5163
5164 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5165 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5166 IndexLoad->getOperand(0).getReg(), MIB);
5167
5168 auto TBL1 = MIB.buildInstr(
5169 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5170 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5172
5173 auto Copy =
5174 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5175 .addReg(TBL1.getReg(0), {}, AArch64::dsub);
5176 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5177 I.eraseFromParent();
5178 return true;
5179 }
5180
5181 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5182 // Q registers for regalloc.
5183 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5184 auto RegSeq = createQTuple(Regs, MIB);
5185 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5186 {RegSeq, IndexLoad->getOperand(0)});
5188 I.eraseFromParent();
5189 return true;
5190}
5191
5192MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5193 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5194 unsigned LaneIdx, const RegisterBank &RB,
5195 MachineIRBuilder &MIRBuilder) const {
5196 MachineInstr *InsElt = nullptr;
5197 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5198 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5199
5200 // Create a register to define with the insert if one wasn't passed in.
5201 if (!DstReg)
5202 DstReg = MRI.createVirtualRegister(DstRC);
5203
5204 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5205 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5206
5207 if (RB.getID() == AArch64::FPRRegBankID) {
5208 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5209 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5210 .addImm(LaneIdx)
5211 .addUse(InsSub->getOperand(0).getReg())
5212 .addImm(0);
5213 } else {
5214 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5215 .addImm(LaneIdx)
5216 .addUse(EltReg);
5217 }
5218
5220 return InsElt;
5221}
5222
5223bool AArch64InstructionSelector::selectUSMovFromExtend(
5224 MachineInstr &MI, MachineRegisterInfo &MRI) {
5225 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5226 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5227 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5228 return false;
5229 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5230 const Register DefReg = MI.getOperand(0).getReg();
5231 const LLT DstTy = MRI.getType(DefReg);
5232 unsigned DstSize = DstTy.getSizeInBits();
5233
5234 if (DstSize != 32 && DstSize != 64)
5235 return false;
5236
5237 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5238 MI.getOperand(1).getReg(), MRI);
5239 int64_t Lane;
5240 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5241 return false;
5242 Register Src0 = Extract->getOperand(1).getReg();
5243
5244 const LLT VecTy = MRI.getType(Src0);
5245 if (VecTy.isScalableVector())
5246 return false;
5247
5248 if (VecTy.getSizeInBits() != 128) {
5249 const MachineInstr *ScalarToVector = emitScalarToVector(
5250 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5251 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5252 Src0 = ScalarToVector->getOperand(0).getReg();
5253 }
5254
5255 unsigned Opcode;
5256 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5257 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5258 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5259 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5260 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5261 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5262 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5263 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5264 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5265 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5266 else
5267 llvm_unreachable("Unexpected type combo for S/UMov!");
5268
5269 // We may need to generate one of these, depending on the type and sign of the
5270 // input:
5271 // DstReg = SMOV Src0, Lane;
5272 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5273 MachineInstr *ExtI = nullptr;
5274 if (DstSize == 64 && !IsSigned) {
5275 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5276 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5277 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5278 .addUse(NewReg)
5279 .addImm(AArch64::sub_32);
5280 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5281 } else
5282 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5283
5285 MI.eraseFromParent();
5286 return true;
5287}
5288
5289MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5290 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5291 unsigned int Op;
5292 if (DstSize == 128) {
5293 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5294 return nullptr;
5295 Op = AArch64::MOVIv16b_ns;
5296 } else {
5297 Op = AArch64::MOVIv8b_ns;
5298 }
5299
5300 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5301
5304 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5306 return &*Mov;
5307 }
5308 return nullptr;
5309}
5310
5311MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5312 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5313 bool Inv) {
5314
5315 unsigned int Op;
5316 if (DstSize == 128) {
5317 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5318 return nullptr;
5319 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5320 } else {
5321 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5322 }
5323
5324 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5325 uint64_t Shift;
5326
5329 Shift = 0;
5330 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5332 Shift = 8;
5333 } else
5334 return nullptr;
5335
5336 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5338 return &*Mov;
5339}
5340
5341MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5342 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5343 bool Inv) {
5344
5345 unsigned int Op;
5346 if (DstSize == 128) {
5347 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5348 return nullptr;
5349 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5350 } else {
5351 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5352 }
5353
5354 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5355 uint64_t Shift;
5356
5359 Shift = 0;
5360 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5362 Shift = 8;
5363 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5365 Shift = 16;
5366 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5368 Shift = 24;
5369 } else
5370 return nullptr;
5371
5372 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5374 return &*Mov;
5375}
5376
5377MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5378 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5379
5380 unsigned int Op;
5381 if (DstSize == 128) {
5382 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5383 return nullptr;
5384 Op = AArch64::MOVIv2d_ns;
5385 } else {
5386 Op = AArch64::MOVID;
5387 }
5388
5389 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5392 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5394 return &*Mov;
5395 }
5396 return nullptr;
5397}
5398
5399MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5400 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5401 bool Inv) {
5402
5403 unsigned int Op;
5404 if (DstSize == 128) {
5405 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5406 return nullptr;
5407 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5408 } else {
5409 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5410 }
5411
5412 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5413 uint64_t Shift;
5414
5417 Shift = 264;
5418 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5420 Shift = 272;
5421 } else
5422 return nullptr;
5423
5424 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5426 return &*Mov;
5427}
5428
5429MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5430 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5431
5432 unsigned int Op;
5433 bool IsWide = false;
5434 if (DstSize == 128) {
5435 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5436 return nullptr;
5437 Op = AArch64::FMOVv4f32_ns;
5438 IsWide = true;
5439 } else {
5440 Op = AArch64::FMOVv2f32_ns;
5441 }
5442
5443 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5444
5447 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5449 Op = AArch64::FMOVv2f64_ns;
5450 } else
5451 return nullptr;
5452
5453 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5455 return &*Mov;
5456}
5457
5458bool AArch64InstructionSelector::selectIndexedExtLoad(
5459 MachineInstr &MI, MachineRegisterInfo &MRI) {
5460 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5461 Register Dst = ExtLd.getDstReg();
5462 Register WriteBack = ExtLd.getWritebackReg();
5463 Register Base = ExtLd.getBaseReg();
5464 Register Offset = ExtLd.getOffsetReg();
5465 LLT Ty = MRI.getType(Dst);
5466 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5467 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5468 bool IsPre = ExtLd.isPre();
5469 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5470 unsigned InsertIntoSubReg = 0;
5471 bool IsDst64 = Ty.getSizeInBits() == 64;
5472
5473 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5474 // long as they are scalar.
5475 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5476 if ((IsSExt && IsFPR) || Ty.isVector())
5477 return false;
5478
5479 unsigned Opc = 0;
5480 LLT NewLdDstTy;
5481 LLT s32 = LLT::scalar(32);
5482 LLT s64 = LLT::scalar(64);
5483
5484 if (MemSizeBits == 8) {
5485 if (IsSExt) {
5486 if (IsDst64)
5487 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5488 else
5489 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5490 NewLdDstTy = IsDst64 ? s64 : s32;
5491 } else if (IsFPR) {
5492 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5493 InsertIntoSubReg = AArch64::bsub;
5494 NewLdDstTy = LLT::scalar(MemSizeBits);
5495 } else {
5496 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5497 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5498 NewLdDstTy = s32;
5499 }
5500 } else if (MemSizeBits == 16) {
5501 if (IsSExt) {
5502 if (IsDst64)
5503 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5504 else
5505 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5506 NewLdDstTy = IsDst64 ? s64 : s32;
5507 } else if (IsFPR) {
5508 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5509 InsertIntoSubReg = AArch64::hsub;
5510 NewLdDstTy = LLT::scalar(MemSizeBits);
5511 } else {
5512 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5513 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5514 NewLdDstTy = s32;
5515 }
5516 } else if (MemSizeBits == 32) {
5517 if (IsSExt) {
5518 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5519 NewLdDstTy = s64;
5520 } else if (IsFPR) {
5521 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5522 InsertIntoSubReg = AArch64::ssub;
5523 NewLdDstTy = LLT::scalar(MemSizeBits);
5524 } else {
5525 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5526 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5527 NewLdDstTy = s32;
5528 }
5529 } else {
5530 llvm_unreachable("Unexpected size for indexed load");
5531 }
5532
5533 auto Cst = getIConstantVRegVal(Offset, MRI);
5534 if (!Cst)
5535 return false; // Shouldn't happen, but just in case.
5536
5537 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5538 .addImm(Cst->getSExtValue());
5539 LdMI.cloneMemRefs(ExtLd);
5541 // Make sure to select the load with the MemTy as the dest type, and then
5542 // insert into a larger reg if needed.
5543 if (InsertIntoSubReg) {
5544 // Generate a SUBREG_TO_REG.
5545 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5546 .addUse(LdMI.getReg(1))
5547 .addImm(InsertIntoSubReg);
5549 SubToReg.getReg(0),
5550 *getRegClassForTypeOnBank(MRI.getType(Dst),
5551 *RBI.getRegBank(Dst, MRI, TRI)),
5552 MRI);
5553 } else {
5554 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5555 selectCopy(*Copy, TII, MRI, TRI, RBI);
5556 }
5557 MI.eraseFromParent();
5558
5559 return true;
5560}
5561
5562bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5563 MachineRegisterInfo &MRI) {
5564 auto &Ld = cast<GIndexedLoad>(MI);
5565 Register Dst = Ld.getDstReg();
5566 Register WriteBack = Ld.getWritebackReg();
5567 Register Base = Ld.getBaseReg();
5568 Register Offset = Ld.getOffsetReg();
5569 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5570 "Unexpected type for indexed load");
5571 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5572
5573 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5574 return selectIndexedExtLoad(MI, MRI);
5575
5576 unsigned Opc = 0;
5577 if (Ld.isPre()) {
5578 static constexpr unsigned GPROpcodes[] = {
5579 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5580 AArch64::LDRXpre};
5581 static constexpr unsigned FPROpcodes[] = {
5582 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5583 AArch64::LDRQpre};
5584 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5585 ? FPROpcodes[Log2_32(MemSize)]
5586 : GPROpcodes[Log2_32(MemSize)];
5587 ;
5588 } else {
5589 static constexpr unsigned GPROpcodes[] = {
5590 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5591 AArch64::LDRXpost};
5592 static constexpr unsigned FPROpcodes[] = {
5593 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5594 AArch64::LDRDpost, AArch64::LDRQpost};
5595 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5596 ? FPROpcodes[Log2_32(MemSize)]
5597 : GPROpcodes[Log2_32(MemSize)];
5598 ;
5599 }
5600 auto Cst = getIConstantVRegVal(Offset, MRI);
5601 if (!Cst)
5602 return false; // Shouldn't happen, but just in case.
5603 auto LdMI =
5604 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5605 LdMI.cloneMemRefs(Ld);
5607 MI.eraseFromParent();
5608 return true;
5609}
5610
5611bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5612 MachineRegisterInfo &MRI) {
5613 Register Dst = I.getWritebackReg();
5614 Register Val = I.getValueReg();
5615 Register Base = I.getBaseReg();
5616 Register Offset = I.getOffsetReg();
5617 assert(MRI.getType(Val).getSizeInBits() <= 128 &&
5618 "Unexpected type for indexed store");
5619
5620 LocationSize MemSize = I.getMMO().getSize();
5621 unsigned MemSizeInBytes = MemSize.getValue();
5622
5623 assert(MemSizeInBytes && MemSizeInBytes <= 16 &&
5624 "Unexpected indexed store size");
5625 unsigned MemSizeLog2 = Log2_32(MemSizeInBytes);
5626
5627 unsigned Opc = 0;
5628 if (I.isPre()) {
5629 static constexpr unsigned GPROpcodes[] = {
5630 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5631 AArch64::STRXpre};
5632 static constexpr unsigned FPROpcodes[] = {
5633 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5634 AArch64::STRQpre};
5635
5636 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5637 Opc = FPROpcodes[MemSizeLog2];
5638 else
5639 Opc = GPROpcodes[MemSizeLog2];
5640 } else {
5641 static constexpr unsigned GPROpcodes[] = {
5642 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5643 AArch64::STRXpost};
5644 static constexpr unsigned FPROpcodes[] = {
5645 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5646 AArch64::STRDpost, AArch64::STRQpost};
5647
5648 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5649 Opc = FPROpcodes[MemSizeLog2];
5650 else
5651 Opc = GPROpcodes[MemSizeLog2];
5652 }
5653
5654 auto Cst = getIConstantVRegVal(Offset, MRI);
5655 if (!Cst)
5656 return false; // Shouldn't happen, but just in case.
5657 auto Str =
5658 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5659 Str.cloneMemRefs(I);
5661 I.eraseFromParent();
5662 return true;
5663}
5664
5665MachineInstr *
5666AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5667 MachineIRBuilder &MIRBuilder,
5668 MachineRegisterInfo &MRI) {
5669 LLT DstTy = MRI.getType(Dst);
5670 unsigned DstSize = DstTy.getSizeInBits();
5671 assert((DstSize == 64 || DstSize == 128) &&
5672 "Unexpected vector constant size");
5673
5674 if (CV->isNullValue()) {
5675 if (DstSize == 128) {
5676 auto Mov =
5677 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5679 return &*Mov;
5680 }
5681
5682 if (DstSize == 64) {
5683 auto Mov =
5684 MIRBuilder
5685 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5686 .addImm(0);
5687 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5688 .addReg(Mov.getReg(0), {}, AArch64::dsub);
5689 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5690 return &*Copy;
5691 }
5692 }
5693
5694 if (Constant *SplatValue = CV->getSplatValue()) {
5695 APInt SplatValueAsInt =
5696 isa<ConstantFP>(SplatValue)
5697 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5698 : SplatValue->getUniqueInteger();
5699 APInt DefBits = APInt::getSplat(
5700 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5701 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5702 MachineInstr *NewOp;
5703 bool Inv = false;
5704 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5705 (NewOp =
5706 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5707 (NewOp =
5708 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5709 (NewOp =
5710 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5711 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5712 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5713 return NewOp;
5714
5715 DefBits = ~DefBits;
5716 Inv = true;
5717 if ((NewOp =
5718 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5719 (NewOp =
5720 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5721 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5722 return NewOp;
5723 return nullptr;
5724 };
5725
5726 if (auto *NewOp = TryMOVIWithBits(DefBits))
5727 return NewOp;
5728
5729 // See if a fneg of the constant can be materialized with a MOVI, etc
5730 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5731 unsigned NegOpc) -> MachineInstr * {
5732 // FNegate each sub-element of the constant
5733 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5734 APInt NegBits(DstSize, 0);
5735 unsigned NumElts = DstSize / NumBits;
5736 for (unsigned i = 0; i < NumElts; i++)
5737 NegBits |= Neg << (NumBits * i);
5738 NegBits = DefBits ^ NegBits;
5739
5740 // Try to create the new constants with MOVI, and if so generate a fneg
5741 // for it.
5742 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5743 Register NewDst = MRI.createVirtualRegister(
5744 DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5745 NewOp->getOperand(0).setReg(NewDst);
5746 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5747 }
5748 return nullptr;
5749 };
5750 MachineInstr *R;
5751 if ((R = TryWithFNeg(DefBits, 32,
5752 DstSize == 64 ? AArch64::FNEGv2f32
5753 : AArch64::FNEGv4f32)) ||
5754 (R = TryWithFNeg(DefBits, 64,
5755 DstSize == 64 ? AArch64::FNEGDr
5756 : AArch64::FNEGv2f64)) ||
5757 (STI.hasFullFP16() &&
5758 (R = TryWithFNeg(DefBits, 16,
5759 DstSize == 64 ? AArch64::FNEGv4f16
5760 : AArch64::FNEGv8f16))))
5761 return R;
5762 }
5763
5764 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5765 if (!CPLoad) {
5766 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5767 return nullptr;
5768 }
5769
5770 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5772 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5773 return &*Copy;
5774}
5775
5776bool AArch64InstructionSelector::tryOptConstantBuildVec(
5777 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5778 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5779 unsigned DstSize = DstTy.getSizeInBits();
5780 assert(DstSize <= 128 && "Unexpected build_vec type!");
5781 if (DstSize < 32)
5782 return false;
5783 // Check if we're building a constant vector, in which case we want to
5784 // generate a constant pool load instead of a vector insert sequence.
5786 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5787 Register OpReg = I.getOperand(Idx).getReg();
5788 if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
5789 OpReg, MRI, /*LookThroughInstrs=*/true,
5790 /*LookThroughAnyExt=*/true)) {
5791 MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
5792
5793 if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
5794 Csts.emplace_back(
5795 ConstantInt::get(MIB.getMF().getFunction().getContext(),
5796 std::move(AnyConst->Value)));
5797 continue;
5798 }
5799
5800 if (DefMI->getOpcode() == TargetOpcode::G_FCONSTANT) {
5801 Csts.emplace_back(
5802 const_cast<ConstantFP *>(DefMI->getOperand(1).getFPImm()));
5803 continue;
5804 }
5805 }
5806 return false;
5807 }
5808 Constant *CV = ConstantVector::get(Csts);
5809 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5810 return false;
5811 I.eraseFromParent();
5812 return true;
5813}
5814
5815bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5816 MachineInstr &I, MachineRegisterInfo &MRI) {
5817 // Given:
5818 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5819 //
5820 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5821 Register Dst = I.getOperand(0).getReg();
5822 Register EltReg = I.getOperand(1).getReg();
5823 LLT EltTy = MRI.getType(EltReg);
5824 // If the index isn't on the same bank as its elements, then this can't be a
5825 // SUBREG_TO_REG.
5826 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5827 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5828 if (EltRB != DstRB)
5829 return false;
5830 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5831 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5832 }))
5833 return false;
5834 unsigned SubReg;
5835 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5836 if (!EltRC)
5837 return false;
5838 const TargetRegisterClass *DstRC =
5839 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5840 if (!DstRC)
5841 return false;
5842 if (!getSubRegForClass(EltRC, TRI, SubReg))
5843 return false;
5844 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5845 .addUse(EltReg)
5846 .addImm(SubReg);
5847 I.eraseFromParent();
5848 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5849 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5850}
5851
5852bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5853 MachineRegisterInfo &MRI) {
5854 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5855 // Until we port more of the optimized selections, for now just use a vector
5856 // insert sequence.
5857 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5858 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5859 unsigned EltSize = EltTy.getSizeInBits();
5860
5861 if (tryOptConstantBuildVec(I, DstTy, MRI))
5862 return true;
5863 if (tryOptBuildVecToSubregToReg(I, MRI))
5864 return true;
5865
5866 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5867 return false; // Don't support all element types yet.
5868 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5869
5870 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5871 MachineInstr *ScalarToVec =
5872 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5873 I.getOperand(1).getReg(), MIB);
5874 if (!ScalarToVec)
5875 return false;
5876
5877 Register DstVec = ScalarToVec->getOperand(0).getReg();
5878 unsigned DstSize = DstTy.getSizeInBits();
5879
5880 // Keep track of the last MI we inserted. Later on, we might be able to save
5881 // a copy using it.
5882 MachineInstr *PrevMI = ScalarToVec;
5883 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5884 // Note that if we don't do a subregister copy, we can end up making an
5885 // extra register.
5886 Register OpReg = I.getOperand(i).getReg();
5887 // Do not emit inserts for undefs
5888 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5889 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5890 DstVec = PrevMI->getOperand(0).getReg();
5891 }
5892 }
5893
5894 // If DstTy's size in bits is less than 128, then emit a subregister copy
5895 // from DstVec to the last register we've defined.
5896 if (DstSize < 128) {
5897 // Force this to be FPR using the destination vector.
5898 const TargetRegisterClass *RC =
5899 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5900 if (!RC)
5901 return false;
5902 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5903 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5904 return false;
5905 }
5906
5907 unsigned SubReg = 0;
5908 if (!getSubRegForClass(RC, TRI, SubReg))
5909 return false;
5910 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5911 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5912 << "\n");
5913 return false;
5914 }
5915
5917 Register DstReg = I.getOperand(0).getReg();
5918
5919 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, {}, SubReg);
5920 MachineOperand &RegOp = I.getOperand(1);
5921 RegOp.setReg(Reg);
5922 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5923 } else {
5924 // We either have a vector with all elements (except the first one) undef or
5925 // at least one non-undef non-first element. In the first case, we need to
5926 // constrain the output register ourselves as we may have generated an
5927 // INSERT_SUBREG operation which is a generic operation for which the
5928 // output regclass cannot be automatically chosen.
5929 //
5930 // In the second case, there is no need to do this as it may generate an
5931 // instruction like INSvi32gpr where the regclass can be automatically
5932 // chosen.
5933 //
5934 // Also, we save a copy by re-using the destination register on the final
5935 // insert.
5936 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5938
5939 Register DstReg = PrevMI->getOperand(0).getReg();
5940 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5941 const TargetRegisterClass *RC =
5942 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5943 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5944 }
5945 }
5946
5948 return true;
5949}
5950
5951bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5952 unsigned NumVecs,
5953 MachineInstr &I) {
5954 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5955 assert(Opc && "Expected an opcode?");
5956 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5957 auto &MRI = *MIB.getMRI();
5958 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5959 unsigned Size = Ty.getSizeInBits();
5960 assert((Size == 64 || Size == 128) &&
5961 "Destination must be 64 bits or 128 bits?");
5962 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5963 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5964 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5965 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5966 Load.cloneMemRefs(I);
5968 Register SelectedLoadDst = Load->getOperand(0).getReg();
5969 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5970 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5971 .addReg(SelectedLoadDst, {}, SubReg + Idx);
5972 // Emit the subreg copies and immediately select them.
5973 // FIXME: We should refactor our copy code into an emitCopy helper and
5974 // clean up uses of this pattern elsewhere in the selector.
5975 selectCopy(*Vec, TII, MRI, TRI, RBI);
5976 }
5977 return true;
5978}
5979
5980bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5981 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5982 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5983 assert(Opc && "Expected an opcode?");
5984 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5985 auto &MRI = *MIB.getMRI();
5986 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5987 bool Narrow = Ty.getSizeInBits() == 64;
5988
5989 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5990 SmallVector<Register, 4> Regs(NumVecs);
5991 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5992 [](auto MO) { return MO.getReg(); });
5993
5994 if (Narrow) {
5995 transform(Regs, Regs.begin(), [this](Register Reg) {
5996 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5997 ->getOperand(0)
5998 .getReg();
5999 });
6000 Ty = Ty.multiplyElements(2);
6001 }
6002
6003 Register Tuple = createQTuple(Regs, MIB);
6004 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6005 if (!LaneNo)
6006 return false;
6007
6008 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6009 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6010 .addReg(Tuple)
6011 .addImm(LaneNo->getZExtValue())
6012 .addReg(Ptr);
6013 Load.cloneMemRefs(I);
6015 Register SelectedLoadDst = Load->getOperand(0).getReg();
6016 unsigned SubReg = AArch64::qsub0;
6017 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6018 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6019 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6020 : DstOp(I.getOperand(Idx).getReg())},
6021 {})
6022 .addReg(SelectedLoadDst, {}, SubReg + Idx);
6023 Register WideReg = Vec.getReg(0);
6024 // Emit the subreg copies and immediately select them.
6025 selectCopy(*Vec, TII, MRI, TRI, RBI);
6026 if (Narrow &&
6027 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6028 return false;
6029 }
6030 return true;
6031}
6032
6033void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6034 unsigned NumVecs,
6035 unsigned Opc) {
6036 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6037 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6038 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6039
6040 SmallVector<Register, 2> Regs(NumVecs);
6041 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6042 Regs.begin(), [](auto MO) { return MO.getReg(); });
6043
6044 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6045 : createDTuple(Regs, MIB);
6046 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6047 Store.cloneMemRefs(I);
6049}
6050
6051bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6052 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6053 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6054 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6055 bool Narrow = Ty.getSizeInBits() == 64;
6056
6057 SmallVector<Register, 2> Regs(NumVecs);
6058 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6059 Regs.begin(), [](auto MO) { return MO.getReg(); });
6060
6061 if (Narrow)
6062 transform(Regs, Regs.begin(), [this](Register Reg) {
6063 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6064 ->getOperand(0)
6065 .getReg();
6066 });
6067
6068 Register Tuple = createQTuple(Regs, MIB);
6069
6070 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6071 if (!LaneNo)
6072 return false;
6073 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6074 auto Store = MIB.buildInstr(Opc, {}, {})
6075 .addReg(Tuple)
6076 .addImm(LaneNo->getZExtValue())
6077 .addReg(Ptr);
6078 Store.cloneMemRefs(I);
6080 return true;
6081}
6082
6083bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6084 MachineInstr &I, MachineRegisterInfo &MRI) {
6085 // Find the intrinsic ID.
6086 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6087
6088 const LLT S8 = LLT::scalar(8);
6089 const LLT S16 = LLT::scalar(16);
6090 const LLT S32 = LLT::scalar(32);
6091 const LLT S64 = LLT::scalar(64);
6092 const LLT P0 = LLT::pointer(0, 64);
6093 // Select the instruction.
6094 switch (IntrinID) {
6095 default:
6096 return false;
6097 case Intrinsic::aarch64_ldxp:
6098 case Intrinsic::aarch64_ldaxp: {
6099 auto NewI = MIB.buildInstr(
6100 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6101 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6102 {I.getOperand(3)});
6103 NewI.cloneMemRefs(I);
6105 break;
6106 }
6107 case Intrinsic::aarch64_neon_ld1x2: {
6108 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6109 unsigned Opc = 0;
6110 if (Ty == LLT::fixed_vector(8, S8))
6111 Opc = AArch64::LD1Twov8b;
6112 else if (Ty == LLT::fixed_vector(16, S8))
6113 Opc = AArch64::LD1Twov16b;
6114 else if (Ty == LLT::fixed_vector(4, S16))
6115 Opc = AArch64::LD1Twov4h;
6116 else if (Ty == LLT::fixed_vector(8, S16))
6117 Opc = AArch64::LD1Twov8h;
6118 else if (Ty == LLT::fixed_vector(2, S32))
6119 Opc = AArch64::LD1Twov2s;
6120 else if (Ty == LLT::fixed_vector(4, S32))
6121 Opc = AArch64::LD1Twov4s;
6122 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6123 Opc = AArch64::LD1Twov2d;
6124 else if (Ty == S64 || Ty == P0)
6125 Opc = AArch64::LD1Twov1d;
6126 else
6127 llvm_unreachable("Unexpected type for ld1x2!");
6128 selectVectorLoadIntrinsic(Opc, 2, I);
6129 break;
6130 }
6131 case Intrinsic::aarch64_neon_ld1x3: {
6132 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6133 unsigned Opc = 0;
6134 if (Ty == LLT::fixed_vector(8, S8))
6135 Opc = AArch64::LD1Threev8b;
6136 else if (Ty == LLT::fixed_vector(16, S8))
6137 Opc = AArch64::LD1Threev16b;
6138 else if (Ty == LLT::fixed_vector(4, S16))
6139 Opc = AArch64::LD1Threev4h;
6140 else if (Ty == LLT::fixed_vector(8, S16))
6141 Opc = AArch64::LD1Threev8h;
6142 else if (Ty == LLT::fixed_vector(2, S32))
6143 Opc = AArch64::LD1Threev2s;
6144 else if (Ty == LLT::fixed_vector(4, S32))
6145 Opc = AArch64::LD1Threev4s;
6146 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6147 Opc = AArch64::LD1Threev2d;
6148 else if (Ty == S64 || Ty == P0)
6149 Opc = AArch64::LD1Threev1d;
6150 else
6151 llvm_unreachable("Unexpected type for ld1x3!");
6152 selectVectorLoadIntrinsic(Opc, 3, I);
6153 break;
6154 }
6155 case Intrinsic::aarch64_neon_ld1x4: {
6156 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6157 unsigned Opc = 0;
6158 if (Ty == LLT::fixed_vector(8, S8))
6159 Opc = AArch64::LD1Fourv8b;
6160 else if (Ty == LLT::fixed_vector(16, S8))
6161 Opc = AArch64::LD1Fourv16b;
6162 else if (Ty == LLT::fixed_vector(4, S16))
6163 Opc = AArch64::LD1Fourv4h;
6164 else if (Ty == LLT::fixed_vector(8, S16))
6165 Opc = AArch64::LD1Fourv8h;
6166 else if (Ty == LLT::fixed_vector(2, S32))
6167 Opc = AArch64::LD1Fourv2s;
6168 else if (Ty == LLT::fixed_vector(4, S32))
6169 Opc = AArch64::LD1Fourv4s;
6170 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6171 Opc = AArch64::LD1Fourv2d;
6172 else if (Ty == S64 || Ty == P0)
6173 Opc = AArch64::LD1Fourv1d;
6174 else
6175 llvm_unreachable("Unexpected type for ld1x4!");
6176 selectVectorLoadIntrinsic(Opc, 4, I);
6177 break;
6178 }
6179 case Intrinsic::aarch64_neon_ld2: {
6180 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6181 unsigned Opc = 0;
6182 if (Ty == LLT::fixed_vector(8, S8))
6183 Opc = AArch64::LD2Twov8b;
6184 else if (Ty == LLT::fixed_vector(16, S8))
6185 Opc = AArch64::LD2Twov16b;
6186 else if (Ty == LLT::fixed_vector(4, S16))
6187 Opc = AArch64::LD2Twov4h;
6188 else if (Ty == LLT::fixed_vector(8, S16))
6189 Opc = AArch64::LD2Twov8h;
6190 else if (Ty == LLT::fixed_vector(2, S32))
6191 Opc = AArch64::LD2Twov2s;
6192 else if (Ty == LLT::fixed_vector(4, S32))
6193 Opc = AArch64::LD2Twov4s;
6194 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6195 Opc = AArch64::LD2Twov2d;
6196 else if (Ty == S64 || Ty == P0)
6197 Opc = AArch64::LD1Twov1d;
6198 else
6199 llvm_unreachable("Unexpected type for ld2!");
6200 selectVectorLoadIntrinsic(Opc, 2, I);
6201 break;
6202 }
6203 case Intrinsic::aarch64_neon_ld2lane: {
6204 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6205 unsigned Opc;
6206 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6207 Opc = AArch64::LD2i8;
6208 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6209 Opc = AArch64::LD2i16;
6210 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6211 Opc = AArch64::LD2i32;
6212 else if (Ty == LLT::fixed_vector(2, S64) ||
6213 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6214 Opc = AArch64::LD2i64;
6215 else
6216 llvm_unreachable("Unexpected type for st2lane!");
6217 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6218 return false;
6219 break;
6220 }
6221 case Intrinsic::aarch64_neon_ld2r: {
6222 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6223 unsigned Opc = 0;
6224 if (Ty == LLT::fixed_vector(8, S8))
6225 Opc = AArch64::LD2Rv8b;
6226 else if (Ty == LLT::fixed_vector(16, S8))
6227 Opc = AArch64::LD2Rv16b;
6228 else if (Ty == LLT::fixed_vector(4, S16))
6229 Opc = AArch64::LD2Rv4h;
6230 else if (Ty == LLT::fixed_vector(8, S16))
6231 Opc = AArch64::LD2Rv8h;
6232 else if (Ty == LLT::fixed_vector(2, S32))
6233 Opc = AArch64::LD2Rv2s;
6234 else if (Ty == LLT::fixed_vector(4, S32))
6235 Opc = AArch64::LD2Rv4s;
6236 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6237 Opc = AArch64::LD2Rv2d;
6238 else if (Ty == S64 || Ty == P0)
6239 Opc = AArch64::LD2Rv1d;
6240 else
6241 llvm_unreachable("Unexpected type for ld2r!");
6242 selectVectorLoadIntrinsic(Opc, 2, I);
6243 break;
6244 }
6245 case Intrinsic::aarch64_neon_ld3: {
6246 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6247 unsigned Opc = 0;
6248 if (Ty == LLT::fixed_vector(8, S8))
6249 Opc = AArch64::LD3Threev8b;
6250 else if (Ty == LLT::fixed_vector(16, S8))
6251 Opc = AArch64::LD3Threev16b;
6252 else if (Ty == LLT::fixed_vector(4, S16))
6253 Opc = AArch64::LD3Threev4h;
6254 else if (Ty == LLT::fixed_vector(8, S16))
6255 Opc = AArch64::LD3Threev8h;
6256 else if (Ty == LLT::fixed_vector(2, S32))
6257 Opc = AArch64::LD3Threev2s;
6258 else if (Ty == LLT::fixed_vector(4, S32))
6259 Opc = AArch64::LD3Threev4s;
6260 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6261 Opc = AArch64::LD3Threev2d;
6262 else if (Ty == S64 || Ty == P0)
6263 Opc = AArch64::LD1Threev1d;
6264 else
6265 llvm_unreachable("Unexpected type for ld3!");
6266 selectVectorLoadIntrinsic(Opc, 3, I);
6267 break;
6268 }
6269 case Intrinsic::aarch64_neon_ld3lane: {
6270 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6271 unsigned Opc;
6272 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6273 Opc = AArch64::LD3i8;
6274 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6275 Opc = AArch64::LD3i16;
6276 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6277 Opc = AArch64::LD3i32;
6278 else if (Ty == LLT::fixed_vector(2, S64) ||
6279 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6280 Opc = AArch64::LD3i64;
6281 else
6282 llvm_unreachable("Unexpected type for st3lane!");
6283 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6284 return false;
6285 break;
6286 }
6287 case Intrinsic::aarch64_neon_ld3r: {
6288 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6289 unsigned Opc = 0;
6290 if (Ty == LLT::fixed_vector(8, S8))
6291 Opc = AArch64::LD3Rv8b;
6292 else if (Ty == LLT::fixed_vector(16, S8))
6293 Opc = AArch64::LD3Rv16b;
6294 else if (Ty == LLT::fixed_vector(4, S16))
6295 Opc = AArch64::LD3Rv4h;
6296 else if (Ty == LLT::fixed_vector(8, S16))
6297 Opc = AArch64::LD3Rv8h;
6298 else if (Ty == LLT::fixed_vector(2, S32))
6299 Opc = AArch64::LD3Rv2s;
6300 else if (Ty == LLT::fixed_vector(4, S32))
6301 Opc = AArch64::LD3Rv4s;
6302 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6303 Opc = AArch64::LD3Rv2d;
6304 else if (Ty == S64 || Ty == P0)
6305 Opc = AArch64::LD3Rv1d;
6306 else
6307 llvm_unreachable("Unexpected type for ld3r!");
6308 selectVectorLoadIntrinsic(Opc, 3, I);
6309 break;
6310 }
6311 case Intrinsic::aarch64_neon_ld4: {
6312 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6313 unsigned Opc = 0;
6314 if (Ty == LLT::fixed_vector(8, S8))
6315 Opc = AArch64::LD4Fourv8b;
6316 else if (Ty == LLT::fixed_vector(16, S8))
6317 Opc = AArch64::LD4Fourv16b;
6318 else if (Ty == LLT::fixed_vector(4, S16))
6319 Opc = AArch64::LD4Fourv4h;
6320 else if (Ty == LLT::fixed_vector(8, S16))
6321 Opc = AArch64::LD4Fourv8h;
6322 else if (Ty == LLT::fixed_vector(2, S32))
6323 Opc = AArch64::LD4Fourv2s;
6324 else if (Ty == LLT::fixed_vector(4, S32))
6325 Opc = AArch64::LD4Fourv4s;
6326 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6327 Opc = AArch64::LD4Fourv2d;
6328 else if (Ty == S64 || Ty == P0)
6329 Opc = AArch64::LD1Fourv1d;
6330 else
6331 llvm_unreachable("Unexpected type for ld4!");
6332 selectVectorLoadIntrinsic(Opc, 4, I);
6333 break;
6334 }
6335 case Intrinsic::aarch64_neon_ld4lane: {
6336 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6337 unsigned Opc;
6338 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6339 Opc = AArch64::LD4i8;
6340 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6341 Opc = AArch64::LD4i16;
6342 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6343 Opc = AArch64::LD4i32;
6344 else if (Ty == LLT::fixed_vector(2, S64) ||
6345 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6346 Opc = AArch64::LD4i64;
6347 else
6348 llvm_unreachable("Unexpected type for st4lane!");
6349 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6350 return false;
6351 break;
6352 }
6353 case Intrinsic::aarch64_neon_ld4r: {
6354 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6355 unsigned Opc = 0;
6356 if (Ty == LLT::fixed_vector(8, S8))
6357 Opc = AArch64::LD4Rv8b;
6358 else if (Ty == LLT::fixed_vector(16, S8))
6359 Opc = AArch64::LD4Rv16b;
6360 else if (Ty == LLT::fixed_vector(4, S16))
6361 Opc = AArch64::LD4Rv4h;
6362 else if (Ty == LLT::fixed_vector(8, S16))
6363 Opc = AArch64::LD4Rv8h;
6364 else if (Ty == LLT::fixed_vector(2, S32))
6365 Opc = AArch64::LD4Rv2s;
6366 else if (Ty == LLT::fixed_vector(4, S32))
6367 Opc = AArch64::LD4Rv4s;
6368 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6369 Opc = AArch64::LD4Rv2d;
6370 else if (Ty == S64 || Ty == P0)
6371 Opc = AArch64::LD4Rv1d;
6372 else
6373 llvm_unreachable("Unexpected type for ld4r!");
6374 selectVectorLoadIntrinsic(Opc, 4, I);
6375 break;
6376 }
6377 case Intrinsic::aarch64_neon_st1x2: {
6378 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6379 unsigned Opc;
6380 if (Ty == LLT::fixed_vector(8, S8))
6381 Opc = AArch64::ST1Twov8b;
6382 else if (Ty == LLT::fixed_vector(16, S8))
6383 Opc = AArch64::ST1Twov16b;
6384 else if (Ty == LLT::fixed_vector(4, S16))
6385 Opc = AArch64::ST1Twov4h;
6386 else if (Ty == LLT::fixed_vector(8, S16))
6387 Opc = AArch64::ST1Twov8h;
6388 else if (Ty == LLT::fixed_vector(2, S32))
6389 Opc = AArch64::ST1Twov2s;
6390 else if (Ty == LLT::fixed_vector(4, S32))
6391 Opc = AArch64::ST1Twov4s;
6392 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6393 Opc = AArch64::ST1Twov2d;
6394 else if (Ty == S64 || Ty == P0)
6395 Opc = AArch64::ST1Twov1d;
6396 else
6397 llvm_unreachable("Unexpected type for st1x2!");
6398 selectVectorStoreIntrinsic(I, 2, Opc);
6399 break;
6400 }
6401 case Intrinsic::aarch64_neon_st1x3: {
6402 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6403 unsigned Opc;
6404 if (Ty == LLT::fixed_vector(8, S8))
6405 Opc = AArch64::ST1Threev8b;
6406 else if (Ty == LLT::fixed_vector(16, S8))
6407 Opc = AArch64::ST1Threev16b;
6408 else if (Ty == LLT::fixed_vector(4, S16))
6409 Opc = AArch64::ST1Threev4h;
6410 else if (Ty == LLT::fixed_vector(8, S16))
6411 Opc = AArch64::ST1Threev8h;
6412 else if (Ty == LLT::fixed_vector(2, S32))
6413 Opc = AArch64::ST1Threev2s;
6414 else if (Ty == LLT::fixed_vector(4, S32))
6415 Opc = AArch64::ST1Threev4s;
6416 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6417 Opc = AArch64::ST1Threev2d;
6418 else if (Ty == S64 || Ty == P0)
6419 Opc = AArch64::ST1Threev1d;
6420 else
6421 llvm_unreachable("Unexpected type for st1x3!");
6422 selectVectorStoreIntrinsic(I, 3, Opc);
6423 break;
6424 }
6425 case Intrinsic::aarch64_neon_st1x4: {
6426 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6427 unsigned Opc;
6428 if (Ty == LLT::fixed_vector(8, S8))
6429 Opc = AArch64::ST1Fourv8b;
6430 else if (Ty == LLT::fixed_vector(16, S8))
6431 Opc = AArch64::ST1Fourv16b;
6432 else if (Ty == LLT::fixed_vector(4, S16))
6433 Opc = AArch64::ST1Fourv4h;
6434 else if (Ty == LLT::fixed_vector(8, S16))
6435 Opc = AArch64::ST1Fourv8h;
6436 else if (Ty == LLT::fixed_vector(2, S32))
6437 Opc = AArch64::ST1Fourv2s;
6438 else if (Ty == LLT::fixed_vector(4, S32))
6439 Opc = AArch64::ST1Fourv4s;
6440 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6441 Opc = AArch64::ST1Fourv2d;
6442 else if (Ty == S64 || Ty == P0)
6443 Opc = AArch64::ST1Fourv1d;
6444 else
6445 llvm_unreachable("Unexpected type for st1x4!");
6446 selectVectorStoreIntrinsic(I, 4, Opc);
6447 break;
6448 }
6449 case Intrinsic::aarch64_neon_st2: {
6450 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6451 unsigned Opc;
6452 if (Ty == LLT::fixed_vector(8, S8))
6453 Opc = AArch64::ST2Twov8b;
6454 else if (Ty == LLT::fixed_vector(16, S8))
6455 Opc = AArch64::ST2Twov16b;
6456 else if (Ty == LLT::fixed_vector(4, S16))
6457 Opc = AArch64::ST2Twov4h;
6458 else if (Ty == LLT::fixed_vector(8, S16))
6459 Opc = AArch64::ST2Twov8h;
6460 else if (Ty == LLT::fixed_vector(2, S32))
6461 Opc = AArch64::ST2Twov2s;
6462 else if (Ty == LLT::fixed_vector(4, S32))
6463 Opc = AArch64::ST2Twov4s;
6464 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6465 Opc = AArch64::ST2Twov2d;
6466 else if (Ty == S64 || Ty == P0)
6467 Opc = AArch64::ST1Twov1d;
6468 else
6469 llvm_unreachable("Unexpected type for st2!");
6470 selectVectorStoreIntrinsic(I, 2, Opc);
6471 break;
6472 }
6473 case Intrinsic::aarch64_neon_st3: {
6474 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6475 unsigned Opc;
6476 if (Ty == LLT::fixed_vector(8, S8))
6477 Opc = AArch64::ST3Threev8b;
6478 else if (Ty == LLT::fixed_vector(16, S8))
6479 Opc = AArch64::ST3Threev16b;
6480 else if (Ty == LLT::fixed_vector(4, S16))
6481 Opc = AArch64::ST3Threev4h;
6482 else if (Ty == LLT::fixed_vector(8, S16))
6483 Opc = AArch64::ST3Threev8h;
6484 else if (Ty == LLT::fixed_vector(2, S32))
6485 Opc = AArch64::ST3Threev2s;
6486 else if (Ty == LLT::fixed_vector(4, S32))
6487 Opc = AArch64::ST3Threev4s;
6488 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6489 Opc = AArch64::ST3Threev2d;
6490 else if (Ty == S64 || Ty == P0)
6491 Opc = AArch64::ST1Threev1d;
6492 else
6493 llvm_unreachable("Unexpected type for st3!");
6494 selectVectorStoreIntrinsic(I, 3, Opc);
6495 break;
6496 }
6497 case Intrinsic::aarch64_neon_st4: {
6498 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6499 unsigned Opc;
6500 if (Ty == LLT::fixed_vector(8, S8))
6501 Opc = AArch64::ST4Fourv8b;
6502 else if (Ty == LLT::fixed_vector(16, S8))
6503 Opc = AArch64::ST4Fourv16b;
6504 else if (Ty == LLT::fixed_vector(4, S16))
6505 Opc = AArch64::ST4Fourv4h;
6506 else if (Ty == LLT::fixed_vector(8, S16))
6507 Opc = AArch64::ST4Fourv8h;
6508 else if (Ty == LLT::fixed_vector(2, S32))
6509 Opc = AArch64::ST4Fourv2s;
6510 else if (Ty == LLT::fixed_vector(4, S32))
6511 Opc = AArch64::ST4Fourv4s;
6512 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6513 Opc = AArch64::ST4Fourv2d;
6514 else if (Ty == S64 || Ty == P0)
6515 Opc = AArch64::ST1Fourv1d;
6516 else
6517 llvm_unreachable("Unexpected type for st4!");
6518 selectVectorStoreIntrinsic(I, 4, Opc);
6519 break;
6520 }
6521 case Intrinsic::aarch64_neon_st2lane: {
6522 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6523 unsigned Opc;
6524 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6525 Opc = AArch64::ST2i8;
6526 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6527 Opc = AArch64::ST2i16;
6528 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6529 Opc = AArch64::ST2i32;
6530 else if (Ty == LLT::fixed_vector(2, S64) ||
6531 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6532 Opc = AArch64::ST2i64;
6533 else
6534 llvm_unreachable("Unexpected type for st2lane!");
6535 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6536 return false;
6537 break;
6538 }
6539 case Intrinsic::aarch64_neon_st3lane: {
6540 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6541 unsigned Opc;
6542 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6543 Opc = AArch64::ST3i8;
6544 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6545 Opc = AArch64::ST3i16;
6546 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6547 Opc = AArch64::ST3i32;
6548 else if (Ty == LLT::fixed_vector(2, S64) ||
6549 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6550 Opc = AArch64::ST3i64;
6551 else
6552 llvm_unreachable("Unexpected type for st3lane!");
6553 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6554 return false;
6555 break;
6556 }
6557 case Intrinsic::aarch64_neon_st4lane: {
6558 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6559 unsigned Opc;
6560 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6561 Opc = AArch64::ST4i8;
6562 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6563 Opc = AArch64::ST4i16;
6564 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6565 Opc = AArch64::ST4i32;
6566 else if (Ty == LLT::fixed_vector(2, S64) ||
6567 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6568 Opc = AArch64::ST4i64;
6569 else
6570 llvm_unreachable("Unexpected type for st4lane!");
6571 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6572 return false;
6573 break;
6574 }
6575 case Intrinsic::aarch64_mops_memset_tag: {
6576 // Transform
6577 // %dst:gpr(p0) = \
6578 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6579 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6580 // where %dst is updated, into
6581 // %Rd:GPR64common, %Rn:GPR64) = \
6582 // MOPSMemorySetTaggingPseudo \
6583 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6584 // where Rd and Rn are tied.
6585 // It is expected that %val has been extended to s64 in legalization.
6586 // Note that the order of the size/value operands are swapped.
6587
6588 Register DstDef = I.getOperand(0).getReg();
6589 // I.getOperand(1) is the intrinsic function
6590 Register DstUse = I.getOperand(2).getReg();
6591 Register ValUse = I.getOperand(3).getReg();
6592 Register SizeUse = I.getOperand(4).getReg();
6593
6594 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6595 // Therefore an additional virtual register is required for the updated size
6596 // operand. This value is not accessible via the semantics of the intrinsic.
6598
6599 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6600 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6601 Memset.cloneMemRefs(I);
6603 break;
6604 }
6605 case Intrinsic::ptrauth_resign_load_relative: {
6606 Register DstReg = I.getOperand(0).getReg();
6607 Register ValReg = I.getOperand(2).getReg();
6608 uint64_t AUTKey = I.getOperand(3).getImm();
6609 Register AUTDisc = I.getOperand(4).getReg();
6610 uint64_t PACKey = I.getOperand(5).getImm();
6611 Register PACDisc = I.getOperand(6).getReg();
6612 int64_t Addend = I.getOperand(7).getImm();
6613
6614 Register AUTAddrDisc = AUTDisc;
6615 uint16_t AUTConstDiscC = 0;
6616 std::tie(AUTConstDiscC, AUTAddrDisc) =
6618
6619 Register PACAddrDisc = PACDisc;
6620 uint16_t PACConstDiscC = 0;
6621 std::tie(PACConstDiscC, PACAddrDisc) =
6623
6624 MIB.buildCopy({AArch64::X16}, {ValReg});
6625
6626 MIB.buildInstr(AArch64::AUTRELLOADPAC)
6627 .addImm(AUTKey)
6628 .addImm(AUTConstDiscC)
6629 .addUse(AUTAddrDisc)
6630 .addImm(PACKey)
6631 .addImm(PACConstDiscC)
6632 .addUse(PACAddrDisc)
6633 .addImm(Addend)
6634 .constrainAllUses(TII, TRI, RBI);
6635 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6636
6637 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6638 I.eraseFromParent();
6639 return true;
6640 }
6641 }
6642
6643 I.eraseFromParent();
6644 return true;
6645}
6646
6647bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6648 MachineRegisterInfo &MRI) {
6649 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6650
6651 switch (IntrinID) {
6652 default:
6653 break;
6654 case Intrinsic::ptrauth_resign: {
6655 Register DstReg = I.getOperand(0).getReg();
6656 Register ValReg = I.getOperand(2).getReg();
6657 uint64_t AUTKey = I.getOperand(3).getImm();
6658 Register AUTDisc = I.getOperand(4).getReg();
6659 uint64_t PACKey = I.getOperand(5).getImm();
6660 Register PACDisc = I.getOperand(6).getReg();
6661
6662 Register AUTAddrDisc = AUTDisc;
6663 uint16_t AUTConstDiscC = 0;
6664 std::tie(AUTConstDiscC, AUTAddrDisc) =
6666
6667 Register PACAddrDisc = PACDisc;
6668 uint16_t PACConstDiscC = 0;
6669 std::tie(PACConstDiscC, PACAddrDisc) =
6671
6672 MIB.buildCopy({AArch64::X16}, {ValReg});
6673 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6674 MIB.buildInstr(AArch64::AUTPAC)
6675 .addImm(AUTKey)
6676 .addImm(AUTConstDiscC)
6677 .addUse(AUTAddrDisc)
6678 .addImm(PACKey)
6679 .addImm(PACConstDiscC)
6680 .addUse(PACAddrDisc)
6681 .constrainAllUses(TII, TRI, RBI);
6682 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6683
6684 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6685 I.eraseFromParent();
6686 return true;
6687 }
6688 case Intrinsic::ptrauth_auth: {
6689 Register DstReg = I.getOperand(0).getReg();
6690 Register ValReg = I.getOperand(2).getReg();
6691 uint64_t AUTKey = I.getOperand(3).getImm();
6692 Register AUTDisc = I.getOperand(4).getReg();
6693
6694 Register AUTAddrDisc = AUTDisc;
6695 uint16_t AUTConstDiscC = 0;
6696 std::tie(AUTConstDiscC, AUTAddrDisc) =
6698
6699 if (STI.isX16X17Safer()) {
6700 MIB.buildCopy({AArch64::X16}, {ValReg});
6701 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6702 MIB.buildInstr(AArch64::AUTx16x17)
6703 .addImm(AUTKey)
6704 .addImm(AUTConstDiscC)
6705 .addUse(AUTAddrDisc)
6706 .constrainAllUses(TII, TRI, RBI);
6707 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6708 } else {
6709 Register ScratchReg =
6710 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6711 MIB.buildInstr(AArch64::AUTxMxN)
6712 .addDef(DstReg)
6713 .addDef(ScratchReg)
6714 .addUse(ValReg)
6715 .addImm(AUTKey)
6716 .addImm(AUTConstDiscC)
6717 .addUse(AUTAddrDisc)
6718 .constrainAllUses(TII, TRI, RBI);
6719 }
6720
6721 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6722 I.eraseFromParent();
6723 return true;
6724 }
6725 case Intrinsic::frameaddress:
6726 case Intrinsic::returnaddress: {
6727 MachineFunction &MF = *I.getParent()->getParent();
6728 MachineFrameInfo &MFI = MF.getFrameInfo();
6729
6730 unsigned Depth = I.getOperand(2).getImm();
6731 Register DstReg = I.getOperand(0).getReg();
6732 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6733
6734 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6735 if (!MFReturnAddr) {
6736 // Insert the copy from LR/X30 into the entry block, before it can be
6737 // clobbered by anything.
6738 MFI.setReturnAddressIsTaken(true);
6739 MFReturnAddr = getFunctionLiveInPhysReg(
6740 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6741 }
6742
6743 if (STI.hasPAuth()) {
6744 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6745 } else {
6746 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6747 MIB.buildInstr(AArch64::XPACLRI);
6748 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6749 }
6750
6751 I.eraseFromParent();
6752 return true;
6753 }
6754
6755 MFI.setFrameAddressIsTaken(true);
6756 Register FrameAddr(AArch64::FP);
6757 while (Depth--) {
6758 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6759 auto Ldr =
6760 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6762 FrameAddr = NextFrame;
6763 }
6764
6765 if (IntrinID == Intrinsic::frameaddress)
6766 MIB.buildCopy({DstReg}, {FrameAddr});
6767 else {
6768 MFI.setReturnAddressIsTaken(true);
6769
6770 if (STI.hasPAuth()) {
6771 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6772 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6773 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6774 } else {
6775 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6776 .addImm(1);
6777 MIB.buildInstr(AArch64::XPACLRI);
6778 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6779 }
6780 }
6781
6782 I.eraseFromParent();
6783 return true;
6784 }
6785 case Intrinsic::aarch64_neon_tbl2:
6786 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6787 return true;
6788 case Intrinsic::aarch64_neon_tbl3:
6789 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6790 false);
6791 return true;
6792 case Intrinsic::aarch64_neon_tbl4:
6793 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6794 return true;
6795 case Intrinsic::aarch64_neon_tbx2:
6796 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6797 return true;
6798 case Intrinsic::aarch64_neon_tbx3:
6799 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6800 return true;
6801 case Intrinsic::aarch64_neon_tbx4:
6802 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6803 return true;
6804 case Intrinsic::swift_async_context_addr:
6805 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6806 {Register(AArch64::FP)})
6807 .addImm(8)
6808 .addImm(0);
6810
6812 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6813 I.eraseFromParent();
6814 return true;
6815 }
6816 return false;
6817}
6818
6819// G_PTRAUTH_GLOBAL_VALUE lowering
6820//
6821// We have 3 lowering alternatives to choose from:
6822// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6823// If the GV doesn't need a GOT load (i.e., is locally defined)
6824// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6825//
6826// - LOADgotPAC: similar to LOADgot, with added PAC.
6827// If the GV needs a GOT load, materialize the pointer using the usual
6828// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6829// section is assumed to be read-only (for example, via relro mechanism). See
6830// LowerMOVaddrPAC.
6831//
6832// - LOADauthptrstatic: similar to LOADgot, but use a
6833// special stub slot instead of a GOT slot.
6834// Load a signed pointer for symbol 'sym' from a stub slot named
6835// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6836// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6837// .data with an
6838// @AUTH relocation. See LowerLOADauthptrstatic.
6839//
6840// All 3 are pseudos that are expand late to longer sequences: this lets us
6841// provide integrity guarantees on the to-be-signed intermediate values.
6842//
6843// LOADauthptrstatic is undesirable because it requires a large section filled
6844// with often similarly-signed pointers, making it a good harvesting target.
6845// Thus, it's only used for ptrauth references to extern_weak to avoid null
6846// checks.
6847
6848bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6849 MachineInstr &I, MachineRegisterInfo &MRI) const {
6850 Register DefReg = I.getOperand(0).getReg();
6851 Register Addr = I.getOperand(1).getReg();
6852 uint64_t Key = I.getOperand(2).getImm();
6853 Register AddrDisc = I.getOperand(3).getReg();
6854 uint64_t Disc = I.getOperand(4).getImm();
6855 int64_t Offset = 0;
6856
6858 report_fatal_error("key in ptrauth global out of range [0, " +
6859 Twine((int)AArch64PACKey::LAST) + "]");
6860
6861 // Blend only works if the integer discriminator is 16-bit wide.
6862 if (!isUInt<16>(Disc))
6864 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6865
6866 // Choosing between 3 lowering alternatives is target-specific.
6867 if (!STI.isTargetELF() && !STI.isTargetMachO())
6868 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6869
6870 if (!MRI.hasOneDef(Addr))
6871 return false;
6872
6873 // First match any offset we take from the real global.
6874 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6875 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6876 Register OffsetReg = DefMI->getOperand(2).getReg();
6877 if (!MRI.hasOneDef(OffsetReg))
6878 return false;
6879 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6880 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6881 return false;
6882
6883 Addr = DefMI->getOperand(1).getReg();
6884 if (!MRI.hasOneDef(Addr))
6885 return false;
6886
6887 DefMI = &*MRI.def_instr_begin(Addr);
6888 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6889 }
6890
6891 // We should be left with a genuine unauthenticated GlobalValue.
6892 const GlobalValue *GV;
6893 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6894 GV = DefMI->getOperand(1).getGlobal();
6896 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6897 GV = DefMI->getOperand(2).getGlobal();
6899 } else {
6900 return false;
6901 }
6902
6903 MachineIRBuilder MIB(I);
6904
6905 // Classify the reference to determine whether it needs a GOT load.
6906 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6907 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6908 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6909 "unsupported non-GOT op flags on ptrauth global reference");
6910 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6911 "unsupported non-GOT reference to weak ptrauth global");
6912
6913 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6914 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6915
6916 // Non-extern_weak:
6917 // - No GOT load needed -> MOVaddrPAC
6918 // - GOT load for non-extern_weak -> LOADgotPAC
6919 // Note that we disallow extern_weak refs to avoid null checks later.
6920 if (!GV->hasExternalWeakLinkage()) {
6921 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6922 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6923 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6925 .addImm(Key)
6926 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6927 .addImm(Disc)
6928 .constrainAllUses(TII, TRI, RBI);
6929 MIB.buildCopy(DefReg, Register(AArch64::X16));
6930 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6931 I.eraseFromParent();
6932 return true;
6933 }
6934
6935 // extern_weak -> LOADauthptrstatic
6936
6937 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6938 // offset alone as a pointer if the symbol wasn't available, which would
6939 // probably break null checks in users. Ptrauth complicates things further:
6940 // error out.
6941 if (Offset != 0)
6943 "unsupported non-zero offset in weak ptrauth global reference");
6944
6945 if (HasAddrDisc)
6946 report_fatal_error("unsupported weak addr-div ptrauth global");
6947
6948 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6949 .addGlobalAddress(GV, Offset)
6950 .addImm(Key)
6951 .addImm(Disc);
6952 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6953
6954 I.eraseFromParent();
6955 return true;
6956}
6957
6958void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6959 MachineRegisterInfo &MRI,
6960 unsigned NumVec, unsigned Opc1,
6961 unsigned Opc2, bool isExt) {
6962 Register DstReg = I.getOperand(0).getReg();
6963 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6964
6965 // Create the REG_SEQUENCE
6967 for (unsigned i = 0; i < NumVec; i++)
6968 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6969 Register RegSeq = createQTuple(Regs, MIB);
6970
6971 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6972 MachineInstrBuilder Instr;
6973 if (isExt) {
6974 Register Reg = I.getOperand(2).getReg();
6975 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6976 } else
6977 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6979 I.eraseFromParent();
6980}
6981
6982InstructionSelector::ComplexRendererFns
6983AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6984 auto MaybeImmed = getImmedFromMO(Root);
6985 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6986 return std::nullopt;
6987 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6988 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6989}
6990
6991InstructionSelector::ComplexRendererFns
6992AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6993 auto MaybeImmed = getImmedFromMO(Root);
6994 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6995 return std::nullopt;
6996 uint64_t Enc = 31 - *MaybeImmed;
6997 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6998}
6999
7000InstructionSelector::ComplexRendererFns
7001AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7002 auto MaybeImmed = getImmedFromMO(Root);
7003 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7004 return std::nullopt;
7005 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7006 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7007}
7008
7009InstructionSelector::ComplexRendererFns
7010AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7011 auto MaybeImmed = getImmedFromMO(Root);
7012 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7013 return std::nullopt;
7014 uint64_t Enc = 63 - *MaybeImmed;
7015 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7016}
7017
7018/// Helper to select an immediate value that can be represented as a 12-bit
7019/// value shifted left by either 0 or 12. If it is possible to do so, return
7020/// the immediate and shift value. If not, return std::nullopt.
7021///
7022/// Used by selectArithImmed and selectNegArithImmed.
7023InstructionSelector::ComplexRendererFns
7024AArch64InstructionSelector::select12BitValueWithLeftShift(
7025 uint64_t Immed) const {
7026 unsigned ShiftAmt;
7027 if (Immed >> 12 == 0) {
7028 ShiftAmt = 0;
7029 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7030 ShiftAmt = 12;
7031 Immed = Immed >> 12;
7032 } else
7033 return std::nullopt;
7034
7035 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7036 return {{
7037 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7038 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7039 }};
7040}
7041
7042/// SelectArithImmed - Select an immediate value that can be represented as
7043/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7044/// Val set to the 12-bit value and Shift set to the shifter operand.
7045InstructionSelector::ComplexRendererFns
7046AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7047 // This function is called from the addsub_shifted_imm ComplexPattern,
7048 // which lists [imm] as the list of opcode it's interested in, however
7049 // we still need to check whether the operand is actually an immediate
7050 // here because the ComplexPattern opcode list is only used in
7051 // root-level opcode matching.
7052 auto MaybeImmed = getImmedFromMO(Root);
7053 if (MaybeImmed == std::nullopt)
7054 return std::nullopt;
7055 return select12BitValueWithLeftShift(*MaybeImmed);
7056}
7057
7058/// SelectNegArithImmed - As above, but negates the value before trying to
7059/// select it.
7060InstructionSelector::ComplexRendererFns
7061AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7062 // We need a register here, because we need to know if we have a 64 or 32
7063 // bit immediate.
7064 if (!Root.isReg())
7065 return std::nullopt;
7066 auto MaybeImmed = getImmedFromMO(Root);
7067 if (MaybeImmed == std::nullopt)
7068 return std::nullopt;
7069 uint64_t Immed = *MaybeImmed;
7070
7071 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7072 // have the opposite effect on the C flag, so this pattern mustn't match under
7073 // those circumstances.
7074 if (Immed == 0)
7075 return std::nullopt;
7076
7077 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7078 // the root.
7079 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7080 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7081 Immed = ~((uint32_t)Immed) + 1;
7082 else
7083 Immed = ~Immed + 1ULL;
7084
7085 if (Immed & 0xFFFFFFFFFF000000ULL)
7086 return std::nullopt;
7087
7088 Immed &= 0xFFFFFFULL;
7089 return select12BitValueWithLeftShift(Immed);
7090}
7091
7092/// Checks if we are sure that folding MI into load/store addressing mode is
7093/// beneficial or not.
7094///
7095/// Returns:
7096/// - true if folding MI would be beneficial.
7097/// - false if folding MI would be bad.
7098/// - std::nullopt if it is not sure whether folding MI is beneficial.
7099///
7100/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7101///
7102/// %13:gpr(s64) = G_CONSTANT i64 1
7103/// %8:gpr(s64) = G_SHL %6, %13(s64)
7104/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7105/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7106std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7107 const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7108 if (MI.getOpcode() == AArch64::G_SHL) {
7109 // Address operands with shifts are free, except for running on subtargets
7110 // with AddrLSLSlow14.
7111 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7112 MI.getOperand(2).getReg(), MRI)) {
7113 const APInt ShiftVal = ValAndVeg->Value;
7114
7115 // Don't fold if we know this will be slow.
7116 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7117 }
7118 }
7119 return std::nullopt;
7120}
7121
7122/// Return true if it is worth folding MI into an extended register. That is,
7123/// if it's safe to pull it into the addressing mode of a load or store as a
7124/// shift.
7125/// \p IsAddrOperand whether the def of MI is used as an address operand
7126/// (e.g. feeding into an LDR/STR).
7127bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7128 const MachineInstr &MI, const MachineRegisterInfo &MRI,
7129 bool IsAddrOperand) const {
7130
7131 // Always fold if there is one use, or if we're optimizing for size.
7132 Register DefReg = MI.getOperand(0).getReg();
7133 if (MRI.hasOneNonDBGUse(DefReg) ||
7134 MI.getParent()->getParent()->getFunction().hasOptSize())
7135 return true;
7136
7137 if (IsAddrOperand) {
7138 // If we are already sure that folding MI is good or bad, return the result.
7139 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7140 return *Worth;
7141
7142 // Fold G_PTR_ADD if its offset operand can be folded
7143 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7144 MachineInstr *OffsetInst =
7145 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7146
7147 // Note, we already know G_PTR_ADD is used by at least two instructions.
7148 // If we are also sure about whether folding is beneficial or not,
7149 // return the result.
7150 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7151 return *Worth;
7152 }
7153 }
7154
7155 // FIXME: Consider checking HasALULSLFast as appropriate.
7156
7157 // We have a fastpath, so folding a shift in and potentially computing it
7158 // many times may be beneficial. Check if this is only used in memory ops.
7159 // If it is, then we should fold.
7160 return all_of(MRI.use_nodbg_instructions(DefReg),
7161 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7162}
7163
7164InstructionSelector::ComplexRendererFns
7165AArch64InstructionSelector::selectExtendedSHL(
7166 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7167 unsigned SizeInBytes, bool WantsExt) const {
7168 assert(Base.isReg() && "Expected base to be a register operand");
7169 assert(Offset.isReg() && "Expected offset to be a register operand");
7170
7171 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7172 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7173
7174 unsigned OffsetOpc = OffsetInst->getOpcode();
7175 bool LookedThroughZExt = false;
7176 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7177 // Try to look through a ZEXT.
7178 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7179 return std::nullopt;
7180
7181 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7182 OffsetOpc = OffsetInst->getOpcode();
7183 LookedThroughZExt = true;
7184
7185 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7186 return std::nullopt;
7187 }
7188 // Make sure that the memory op is a valid size.
7189 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7190 if (LegalShiftVal == 0)
7191 return std::nullopt;
7192 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7193 return std::nullopt;
7194
7195 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7196 // register we will offset is the LHS, and the register containing the
7197 // constant is the RHS.
7198 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7199 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7200 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7201 if (!ValAndVReg) {
7202 // We didn't get a constant on the RHS. If the opcode is a shift, then
7203 // we're done.
7204 if (OffsetOpc == TargetOpcode::G_SHL)
7205 return std::nullopt;
7206
7207 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7208 std::swap(OffsetReg, ConstantReg);
7209 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7210 if (!ValAndVReg)
7211 return std::nullopt;
7212 }
7213
7214 // The value must fit into 3 bits, and must be positive. Make sure that is
7215 // true.
7216 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7217
7218 // Since we're going to pull this into a shift, the constant value must be
7219 // a power of 2. If we got a multiply, then we need to check this.
7220 if (OffsetOpc == TargetOpcode::G_MUL) {
7221 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7222 return std::nullopt;
7223
7224 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7225 ImmVal = Log2_32(ImmVal);
7226 }
7227
7228 if ((ImmVal & 0x7) != ImmVal)
7229 return std::nullopt;
7230
7231 // We are only allowed to shift by LegalShiftVal. This shift value is built
7232 // into the instruction, so we can't just use whatever we want.
7233 if (ImmVal != LegalShiftVal)
7234 return std::nullopt;
7235
7236 unsigned SignExtend = 0;
7237 if (WantsExt) {
7238 // Check if the offset is defined by an extend, unless we looked through a
7239 // G_ZEXT earlier.
7240 if (!LookedThroughZExt) {
7241 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7242 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7244 return std::nullopt;
7245
7246 SignExtend = AArch64_AM::isSignExtendShiftType(Ext) ? 1 : 0;
7247 // We only support SXTW for signed extension here.
7248 if (SignExtend && Ext != AArch64_AM::SXTW)
7249 return std::nullopt;
7250 OffsetReg = ExtInst->getOperand(1).getReg();
7251 }
7252
7253 // Need a 32-bit wide register here.
7254 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7255 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7256 }
7257
7258 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7259 // offset. Signify that we are shifting by setting the shift flag to 1.
7260 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7261 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7262 [=](MachineInstrBuilder &MIB) {
7263 // Need to add both immediates here to make sure that they are both
7264 // added to the instruction.
7265 MIB.addImm(SignExtend);
7266 MIB.addImm(1);
7267 }}};
7268}
7269
7270/// This is used for computing addresses like this:
7271///
7272/// ldr x1, [x2, x3, lsl #3]
7273///
7274/// Where x2 is the base register, and x3 is an offset register. The shift-left
7275/// is a constant value specific to this load instruction. That is, we'll never
7276/// see anything other than a 3 here (which corresponds to the size of the
7277/// element being loaded.)
7278InstructionSelector::ComplexRendererFns
7279AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7280 MachineOperand &Root, unsigned SizeInBytes) const {
7281 if (!Root.isReg())
7282 return std::nullopt;
7283 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7284
7285 // We want to find something like this:
7286 //
7287 // val = G_CONSTANT LegalShiftVal
7288 // shift = G_SHL off_reg val
7289 // ptr = G_PTR_ADD base_reg shift
7290 // x = G_LOAD ptr
7291 //
7292 // And fold it into this addressing mode:
7293 //
7294 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7295
7296 // Check if we can find the G_PTR_ADD.
7297 MachineInstr *PtrAdd =
7298 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7299 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7300 return std::nullopt;
7301
7302 // Now, try to match an opcode which will match our specific offset.
7303 // We want a G_SHL or a G_MUL.
7304 MachineInstr *OffsetInst =
7305 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
7306 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7307 OffsetInst->getOperand(0), SizeInBytes,
7308 /*WantsExt=*/false);
7309}
7310
7311/// This is used for computing addresses like this:
7312///
7313/// ldr x1, [x2, x3]
7314///
7315/// Where x2 is the base register, and x3 is an offset register.
7316///
7317/// When possible (or profitable) to fold a G_PTR_ADD into the address
7318/// calculation, this will do so. Otherwise, it will return std::nullopt.
7319InstructionSelector::ComplexRendererFns
7320AArch64InstructionSelector::selectAddrModeRegisterOffset(
7321 MachineOperand &Root) const {
7322 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7323
7324 // We need a GEP.
7325 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7326 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7327 return std::nullopt;
7328
7329 // If this is used more than once, let's not bother folding.
7330 // TODO: Check if they are memory ops. If they are, then we can still fold
7331 // without having to recompute anything.
7332 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7333 return std::nullopt;
7334
7335 // Base is the GEP's LHS, offset is its RHS.
7336 return {{[=](MachineInstrBuilder &MIB) {
7337 MIB.addUse(Gep->getOperand(1).getReg());
7338 },
7339 [=](MachineInstrBuilder &MIB) {
7340 MIB.addUse(Gep->getOperand(2).getReg());
7341 },
7342 [=](MachineInstrBuilder &MIB) {
7343 // Need to add both immediates here to make sure that they are both
7344 // added to the instruction.
7345 MIB.addImm(0);
7346 MIB.addImm(0);
7347 }}};
7348}
7349
7350/// This is intended to be equivalent to selectAddrModeXRO in
7351/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7352InstructionSelector::ComplexRendererFns
7353AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7354 unsigned SizeInBytes) const {
7355 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7356 if (!Root.isReg())
7357 return std::nullopt;
7358 MachineInstr *PtrAdd =
7359 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7360 if (!PtrAdd)
7361 return std::nullopt;
7362
7363 // Check for an immediates which cannot be encoded in the [base + imm]
7364 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7365 // end up with code like:
7366 //
7367 // mov x0, wide
7368 // add x1 base, x0
7369 // ldr x2, [x1, x0]
7370 //
7371 // In this situation, we can use the [base, xreg] addressing mode to save an
7372 // add/sub:
7373 //
7374 // mov x0, wide
7375 // ldr x2, [base, x0]
7376 auto ValAndVReg =
7378 if (ValAndVReg) {
7379 unsigned Scale = Log2_32(SizeInBytes);
7380 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7381
7382 // Skip immediates that can be selected in the load/store addressing
7383 // mode.
7384 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7385 ImmOff < (0x1000 << Scale))
7386 return std::nullopt;
7387
7388 // Helper lambda to decide whether or not it is preferable to emit an add.
7389 auto isPreferredADD = [](int64_t ImmOff) {
7390 // Constants in [0x0, 0xfff] can be encoded in an add.
7391 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7392 return true;
7393
7394 // Can it be encoded in an add lsl #12?
7395 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7396 return false;
7397
7398 // It can be encoded in an add lsl #12, but we may not want to. If it is
7399 // possible to select this as a single movz, then prefer that. A single
7400 // movz is faster than an add with a shift.
7401 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7402 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7403 };
7404
7405 // If the immediate can be encoded in a single add/sub, then bail out.
7406 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7407 return std::nullopt;
7408 }
7409
7410 // Try to fold shifts into the addressing mode.
7411 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7412 if (AddrModeFns)
7413 return AddrModeFns;
7414
7415 // If that doesn't work, see if it's possible to fold in registers from
7416 // a GEP.
7417 return selectAddrModeRegisterOffset(Root);
7418}
7419
7420/// This is used for computing addresses like this:
7421///
7422/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7423///
7424/// Where we have a 64-bit base register, a 32-bit offset register, and an
7425/// extend (which may or may not be signed).
7426InstructionSelector::ComplexRendererFns
7427AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7428 unsigned SizeInBytes) const {
7429 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7430
7431 MachineInstr *PtrAdd =
7432 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7433 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7434 return std::nullopt;
7435
7436 MachineOperand &LHS = PtrAdd->getOperand(1);
7437 MachineOperand &RHS = PtrAdd->getOperand(2);
7438 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7439
7440 // The first case is the same as selectAddrModeXRO, except we need an extend.
7441 // In this case, we try to find a shift and extend, and fold them into the
7442 // addressing mode.
7443 //
7444 // E.g.
7445 //
7446 // off_reg = G_Z/S/ANYEXT ext_reg
7447 // val = G_CONSTANT LegalShiftVal
7448 // shift = G_SHL off_reg val
7449 // ptr = G_PTR_ADD base_reg shift
7450 // x = G_LOAD ptr
7451 //
7452 // In this case we can get a load like this:
7453 //
7454 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7455 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7456 SizeInBytes, /*WantsExt=*/true);
7457 if (ExtendedShl)
7458 return ExtendedShl;
7459
7460 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7461 //
7462 // e.g.
7463 // ldr something, [base_reg, ext_reg, sxtw]
7464 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7465 return std::nullopt;
7466
7467 // Check if this is an extend. We'll get an extend type if it is.
7469 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7471 return std::nullopt;
7472
7473 // Need a 32-bit wide register.
7474 MachineIRBuilder MIB(*PtrAdd);
7475 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7476 AArch64::GPR32RegClass, MIB);
7477 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7478
7479 // Base is LHS, offset is ExtReg.
7480 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7481 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7482 [=](MachineInstrBuilder &MIB) {
7483 MIB.addImm(SignExtend);
7484 MIB.addImm(0);
7485 }}};
7486}
7487
7488/// Select a "register plus unscaled signed 9-bit immediate" address. This
7489/// should only match when there is an offset that is not valid for a scaled
7490/// immediate addressing mode. The "Size" argument is the size in bytes of the
7491/// memory reference, which is needed here to know what is valid for a scaled
7492/// immediate.
7493InstructionSelector::ComplexRendererFns
7494AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7495 unsigned Size) const {
7496 MachineRegisterInfo &MRI =
7497 Root.getParent()->getParent()->getParent()->getRegInfo();
7498
7499 if (!Root.isReg())
7500 return std::nullopt;
7501
7502 if (!isBaseWithConstantOffset(Root, MRI))
7503 return std::nullopt;
7504
7505 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7506
7507 MachineOperand &OffImm = RootDef->getOperand(2);
7508 if (!OffImm.isReg())
7509 return std::nullopt;
7510 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7511 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7512 return std::nullopt;
7513 int64_t RHSC;
7514 MachineOperand &RHSOp1 = RHS->getOperand(1);
7515 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7516 return std::nullopt;
7517 RHSC = RHSOp1.getCImm()->getSExtValue();
7518
7519 if (RHSC >= -256 && RHSC < 256) {
7520 MachineOperand &Base = RootDef->getOperand(1);
7521 return {{
7522 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7523 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7524 }};
7525 }
7526 return std::nullopt;
7527}
7528
7529InstructionSelector::ComplexRendererFns
7530AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7531 unsigned Size,
7532 MachineRegisterInfo &MRI) const {
7533 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7534 return std::nullopt;
7535 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7536 if (Adrp.getOpcode() != AArch64::ADRP)
7537 return std::nullopt;
7538
7539 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7540 auto Offset = Adrp.getOperand(1).getOffset();
7541 if (Offset % Size != 0)
7542 return std::nullopt;
7543
7544 auto GV = Adrp.getOperand(1).getGlobal();
7545 if (GV->isThreadLocal())
7546 return std::nullopt;
7547
7548 auto &MF = *RootDef.getParent()->getParent();
7549 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7550 return std::nullopt;
7551
7552 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7553 MachineIRBuilder MIRBuilder(RootDef);
7554 Register AdrpReg = Adrp.getOperand(0).getReg();
7555 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7556 [=](MachineInstrBuilder &MIB) {
7557 MIB.addGlobalAddress(GV, Offset,
7558 OpFlags | AArch64II::MO_PAGEOFF |
7560 }}};
7561}
7562
7563/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7564/// "Size" argument is the size in bytes of the memory reference, which
7565/// determines the scale.
7566InstructionSelector::ComplexRendererFns
7567AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7568 unsigned Size) const {
7569 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7570 MachineRegisterInfo &MRI = MF.getRegInfo();
7571
7572 if (!Root.isReg())
7573 return std::nullopt;
7574
7575 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7576 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7577 return {{
7578 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7579 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7580 }};
7581 }
7582
7584 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7585 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7586 // globals into the offset.
7587 MachineInstr *RootParent = Root.getParent();
7588 if (CM == CodeModel::Small &&
7589 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7590 STI.isTargetDarwin())) {
7591 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7592 if (OpFns)
7593 return OpFns;
7594 }
7595
7596 if (isBaseWithConstantOffset(Root, MRI)) {
7597 MachineOperand &LHS = RootDef->getOperand(1);
7598 MachineOperand &RHS = RootDef->getOperand(2);
7599 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7600 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7601
7602 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7603 unsigned Scale = Log2_32(Size);
7604 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7605 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7606 return {{
7607 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7608 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7609 }};
7610
7611 return {{
7612 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7613 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7614 }};
7615 }
7616 }
7617
7618 // Before falling back to our general case, check if the unscaled
7619 // instructions can handle this. If so, that's preferable.
7620 if (selectAddrModeUnscaled(Root, Size))
7621 return std::nullopt;
7622
7623 return {{
7624 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7625 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7626 }};
7627}
7628
7629/// Given a shift instruction, return the correct shift type for that
7630/// instruction.
7632 switch (MI.getOpcode()) {
7633 default:
7635 case TargetOpcode::G_SHL:
7636 return AArch64_AM::LSL;
7637 case TargetOpcode::G_LSHR:
7638 return AArch64_AM::LSR;
7639 case TargetOpcode::G_ASHR:
7640 return AArch64_AM::ASR;
7641 case TargetOpcode::G_ROTR:
7642 return AArch64_AM::ROR;
7643 }
7644}
7645
7646/// Select a "shifted register" operand. If the value is not shifted, set the
7647/// shift operand to a default value of "lsl 0".
7648InstructionSelector::ComplexRendererFns
7649AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7650 bool AllowROR) const {
7651 if (!Root.isReg())
7652 return std::nullopt;
7653 MachineRegisterInfo &MRI =
7654 Root.getParent()->getParent()->getParent()->getRegInfo();
7655
7656 // Check if the operand is defined by an instruction which corresponds to
7657 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7658 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7660 if (ShType == AArch64_AM::InvalidShiftExtend)
7661 return std::nullopt;
7662 if (ShType == AArch64_AM::ROR && !AllowROR)
7663 return std::nullopt;
7664 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7665 return std::nullopt;
7666
7667 // Need an immediate on the RHS.
7668 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7669 auto Immed = getImmedFromMO(ShiftRHS);
7670 if (!Immed)
7671 return std::nullopt;
7672
7673 // We have something that we can fold. Fold in the shift's LHS and RHS into
7674 // the instruction.
7675 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7676 Register ShiftReg = ShiftLHS.getReg();
7677
7678 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7679 unsigned Val = *Immed & (NumBits - 1);
7680 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7681
7682 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7683 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7684}
7685
7686AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7687 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7688 unsigned Opc = MI.getOpcode();
7689
7690 // Handle explicit extend instructions first.
7691 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7692 unsigned Size;
7693 if (Opc == TargetOpcode::G_SEXT)
7694 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7695 else
7696 Size = MI.getOperand(2).getImm();
7697 assert(Size != 64 && "Extend from 64 bits?");
7698 switch (Size) {
7699 case 8:
7700 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7701 case 16:
7702 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7703 case 32:
7704 return AArch64_AM::SXTW;
7705 default:
7707 }
7708 }
7709
7710 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7711 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7712 assert(Size != 64 && "Extend from 64 bits?");
7713 switch (Size) {
7714 case 8:
7715 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7716 case 16:
7717 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7718 case 32:
7719 return AArch64_AM::UXTW;
7720 default:
7722 }
7723 }
7724
7725 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7726 // on the RHS.
7727 if (Opc != TargetOpcode::G_AND)
7729
7730 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7731 if (!MaybeAndMask)
7733 uint64_t AndMask = *MaybeAndMask;
7734 switch (AndMask) {
7735 default:
7737 case 0xFF:
7738 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7739 case 0xFFFF:
7740 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7741 case 0xFFFFFFFF:
7742 return AArch64_AM::UXTW;
7743 }
7744}
7745
7746Register AArch64InstructionSelector::moveScalarRegClass(
7747 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7748 MachineRegisterInfo &MRI = *MIB.getMRI();
7749 auto Ty = MRI.getType(Reg);
7750 assert(!Ty.isVector() && "Expected scalars only!");
7751 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7752 return Reg;
7753
7754 // Create a copy and immediately select it.
7755 // FIXME: We should have an emitCopy function?
7756 auto Copy = MIB.buildCopy({&RC}, {Reg});
7757 selectCopy(*Copy, TII, MRI, TRI, RBI);
7758 return Copy.getReg(0);
7759}
7760
7761/// Select an "extended register" operand. This operand folds in an extend
7762/// followed by an optional left shift.
7763InstructionSelector::ComplexRendererFns
7764AArch64InstructionSelector::selectArithExtendedRegister(
7765 MachineOperand &Root) const {
7766 if (!Root.isReg())
7767 return std::nullopt;
7768 MachineRegisterInfo &MRI =
7769 Root.getParent()->getParent()->getParent()->getRegInfo();
7770
7771 uint64_t ShiftVal = 0;
7772 Register ExtReg;
7774 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7775 if (!RootDef)
7776 return std::nullopt;
7777
7778 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7779 return std::nullopt;
7780
7781 // Check if we can fold a shift and an extend.
7782 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7783 // Look for a constant on the RHS of the shift.
7784 MachineOperand &RHS = RootDef->getOperand(2);
7785 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7786 if (!MaybeShiftVal)
7787 return std::nullopt;
7788 ShiftVal = *MaybeShiftVal;
7789 if (ShiftVal > 4)
7790 return std::nullopt;
7791 // Look for a valid extend instruction on the LHS of the shift.
7792 MachineOperand &LHS = RootDef->getOperand(1);
7793 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7794 if (!ExtDef)
7795 return std::nullopt;
7796 Ext = getExtendTypeForInst(*ExtDef, MRI);
7798 return std::nullopt;
7799 ExtReg = ExtDef->getOperand(1).getReg();
7800 } else {
7801 // Didn't get a shift. Try just folding an extend.
7802 Ext = getExtendTypeForInst(*RootDef, MRI);
7804 return std::nullopt;
7805 ExtReg = RootDef->getOperand(1).getReg();
7806
7807 // If we have a 32 bit instruction which zeroes out the high half of a
7808 // register, we get an implicit zero extend for free. Check if we have one.
7809 // FIXME: We actually emit the extend right now even though we don't have
7810 // to.
7811 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7812 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7813 if (isDef32(*ExtInst))
7814 return std::nullopt;
7815 }
7816 }
7817
7818 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7819 // copy.
7820 MachineIRBuilder MIB(*RootDef);
7821 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7822
7823 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7824 [=](MachineInstrBuilder &MIB) {
7825 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7826 }}};
7827}
7828
7829InstructionSelector::ComplexRendererFns
7830AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7831 if (!Root.isReg())
7832 return std::nullopt;
7833 MachineRegisterInfo &MRI =
7834 Root.getParent()->getParent()->getParent()->getRegInfo();
7835
7836 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7837 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7838 STI.isLittleEndian())
7839 Extract =
7840 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7841 if (!Extract)
7842 return std::nullopt;
7843
7844 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7845 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7846 Register ExtReg = Extract->MI->getOperand(2).getReg();
7847 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7848 }
7849 }
7850 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7851 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7853 Extract->MI->getOperand(2).getReg(), MRI);
7854 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7855 LaneIdx->Value.getSExtValue() == 1) {
7856 Register ExtReg = Extract->MI->getOperand(1).getReg();
7857 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7858 }
7859 }
7860
7861 return std::nullopt;
7862}
7863
7864InstructionSelector::ComplexRendererFns
7865AArch64InstructionSelector::selectCVTFixedPointVecBase(
7866 const MachineOperand &Root) const {
7867 if (!Root.isReg())
7868 return std::nullopt;
7869 const MachineRegisterInfo &MRI =
7870 Root.getParent()->getParent()->getParent()->getRegInfo();
7871
7872 MachineInstr *Dup = getDefIgnoringCopies(Root.getReg(), MRI);
7873 if (Dup->getOpcode() != AArch64::G_DUP)
7874 return std::nullopt;
7875 std::optional<ValueAndVReg> CstVal =
7877 if (!CstVal)
7878 return std::nullopt;
7879
7880 unsigned RegWidth = MRI.getType(Root.getReg()).getScalarSizeInBits();
7881 APFloat FVal(0.0);
7882 switch (RegWidth) {
7883 case 16:
7884 FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
7885 break;
7886 case 32:
7887 FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
7888 break;
7889 case 64:
7890 FVal = APFloat(APFloat::IEEEdouble(), CstVal->Value);
7891 break;
7892 default:
7893 return std::nullopt;
7894 };
7895 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
7896 /*isReciprocal*/ false))
7897 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(FBits); }}};
7898
7899 return std::nullopt;
7900}
7901
7902InstructionSelector::ComplexRendererFns
7903AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
7904 return selectCVTFixedPointVecBase(Root);
7905}
7906
7907void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
7908 const MachineInstr &MI,
7909 int OpIdx) const {
7910 // FIXME: This is only needed to satisfy the type checking in tablegen, and
7911 // should be able to reuse the Renderers already calculated by
7912 // selectCVTFixedPointVecBase.
7913 InstructionSelector::ComplexRendererFns Renderer =
7914 selectCVTFixedPointVecBase(MI.getOperand(2));
7915 assert((Renderer && Renderer->size() == 1) &&
7916 "Expected selectCVTFixedPointVec to provide a function\n");
7917 (Renderer->front())(MIB);
7918}
7919
7920void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7921 const MachineInstr &MI,
7922 int OpIdx) const {
7923 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7924 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7925 "Expected G_CONSTANT");
7926 std::optional<int64_t> CstVal =
7927 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7928 assert(CstVal && "Expected constant value");
7929 MIB.addImm(*CstVal);
7930}
7931
7932void AArch64InstructionSelector::renderLogicalImm32(
7933 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7934 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7935 "Expected G_CONSTANT");
7936 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7937 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7938 MIB.addImm(Enc);
7939}
7940
7941void AArch64InstructionSelector::renderLogicalImm64(
7942 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7943 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7944 "Expected G_CONSTANT");
7945 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7946 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7947 MIB.addImm(Enc);
7948}
7949
7950void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7951 const MachineInstr &MI,
7952 int OpIdx) const {
7953 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7954 "Expected G_UBSANTRAP");
7955 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7956}
7957
7958void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7959 const MachineInstr &MI,
7960 int OpIdx) const {
7961 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7962 "Expected G_FCONSTANT");
7963 MIB.addImm(
7964 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7965}
7966
7967void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7968 const MachineInstr &MI,
7969 int OpIdx) const {
7970 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7971 "Expected G_FCONSTANT");
7972 MIB.addImm(
7973 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7974}
7975
7976void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7977 const MachineInstr &MI,
7978 int OpIdx) const {
7979 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7980 "Expected G_FCONSTANT");
7981 MIB.addImm(
7982 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7983}
7984
7985void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7986 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7987 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7988 "Expected G_FCONSTANT");
7990 .getFPImm()
7991 ->getValueAPF()
7992 .bitcastToAPInt()
7993 .getZExtValue()));
7994}
7995
7996bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7997 const MachineInstr &MI, unsigned NumBytes) const {
7998 if (!MI.mayLoadOrStore())
7999 return false;
8000 assert(MI.hasOneMemOperand() &&
8001 "Expected load/store to have only one mem op!");
8002 return (*MI.memoperands_begin())->getSize() == NumBytes;
8003}
8004
8005bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8006 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8007 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
8008 return false;
8009
8010 // Only return true if we know the operation will zero-out the high half of
8011 // the 64-bit register. Truncates can be subregister copies, which don't
8012 // zero out the high bits. Copies and other copy-like instructions can be
8013 // fed by truncates, or could be lowered as subregister copies.
8014 switch (MI.getOpcode()) {
8015 default:
8016 return true;
8017 case TargetOpcode::COPY:
8018 case TargetOpcode::G_BITCAST:
8019 case TargetOpcode::G_TRUNC:
8020 case TargetOpcode::G_PHI:
8021 return false;
8022 }
8023}
8024
8025
8026// Perform fixups on the given PHI instruction's operands to force them all
8027// to be the same as the destination regbank.
8029 const AArch64RegisterBankInfo &RBI) {
8030 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8031 Register DstReg = MI.getOperand(0).getReg();
8032 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
8033 assert(DstRB && "Expected PHI dst to have regbank assigned");
8034 MachineIRBuilder MIB(MI);
8035
8036 // Go through each operand and ensure it has the same regbank.
8037 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
8038 if (!MO.isReg())
8039 continue;
8040 Register OpReg = MO.getReg();
8041 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
8042 if (RB != DstRB) {
8043 // Insert a cross-bank copy.
8044 auto *OpDef = MRI.getVRegDef(OpReg);
8045 const LLT &Ty = MRI.getType(OpReg);
8046 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8047
8048 // Any instruction we insert must appear after all PHIs in the block
8049 // for the block to be valid MIR.
8050 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
8051 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8052 InsertPt = OpDefBB.getFirstNonPHI();
8053 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
8054 auto Copy = MIB.buildCopy(Ty, OpReg);
8055 MRI.setRegBank(Copy.getReg(0), *DstRB);
8056 MO.setReg(Copy.getReg(0));
8057 }
8058 }
8059}
8060
8061void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8062 // We're looking for PHIs, build a list so we don't invalidate iterators.
8063 MachineRegisterInfo &MRI = MF.getRegInfo();
8065 for (auto &BB : MF) {
8066 for (auto &MI : BB) {
8067 if (MI.getOpcode() == TargetOpcode::G_PHI)
8068 Phis.emplace_back(&MI);
8069 }
8070 }
8071
8072 for (auto *MI : Phis) {
8073 // We need to do some work here if the operand types are < 16 bit and they
8074 // are split across fpr/gpr banks. Since all types <32b on gpr
8075 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8076 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8077 // be selecting heterogenous regbanks for operands if possible, but we
8078 // still need to be able to deal with it here.
8079 //
8080 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8081 // one other operand is on the fpr bank, then we add cross-bank copies
8082 // to homogenize the operand banks. For simplicity the bank that we choose
8083 // to settle on is whatever bank the def operand has. For example:
8084 //
8085 // %endbb:
8086 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8087 // =>
8088 // %bb2:
8089 // ...
8090 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8091 // ...
8092 // %endbb:
8093 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8094 bool HasGPROp = false, HasFPROp = false;
8095 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8096 if (!MO.isReg())
8097 continue;
8098 const LLT &Ty = MRI.getType(MO.getReg());
8099 if (!Ty.isValid() || !Ty.isScalar())
8100 break;
8101 if (Ty.getSizeInBits() >= 32)
8102 break;
8103 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8104 // If for some reason we don't have a regbank yet. Don't try anything.
8105 if (!RB)
8106 break;
8107
8108 if (RB->getID() == AArch64::GPRRegBankID)
8109 HasGPROp = true;
8110 else
8111 HasFPROp = true;
8112 }
8113 // We have heterogenous regbanks, need to fixup.
8114 if (HasGPROp && HasFPROp)
8115 fixupPHIOpBanks(*MI, MRI, RBI);
8116 }
8117}
8118
8119namespace llvm {
8120InstructionSelector *
8122 const AArch64Subtarget &Subtarget,
8123 const AArch64RegisterBankInfo &RBI) {
8124 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8125}
8126}
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isIntPredicate() const
Definition InstrTypes.h:783
bool isUnsigned() const
Definition InstrTypes.h:936
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:463
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:470
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:467
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:572
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
LLT getScalarType() const
constexpr bool isPointerVector() const
constexpr bool isInteger() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
void constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
def_instr_iterator def_instr_begin(Register RegNo) const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
constexpr double e
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:857
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:460
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:293
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:493
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:313
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:438
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:468
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:500
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.