LLVM 19.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
228 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
229 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
230
231 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
232 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
233 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
234
235 unsigned emitConstantPoolEntry(const Constant *CPVal,
236 MachineFunction &MF) const;
238 MachineIRBuilder &MIRBuilder) const;
239
240 // Emit a vector concat operation.
241 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
242 Register Op2,
243 MachineIRBuilder &MIRBuilder) const;
244
245 // Emit an integer compare between LHS and RHS, which checks for Predicate.
246 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
247 MachineOperand &Predicate,
248 MachineIRBuilder &MIRBuilder) const;
249
250 /// Emit a floating point comparison between \p LHS and \p RHS.
251 /// \p Pred if given is the intended predicate to use.
253 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
254 std::optional<CmpInst::Predicate> = std::nullopt) const;
255
257 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
258 std::initializer_list<llvm::SrcOp> SrcOps,
259 MachineIRBuilder &MIRBuilder,
260 const ComplexRendererFns &RenderFns = std::nullopt) const;
261 /// Helper function to emit an add or sub instruction.
262 ///
263 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
264 /// in a specific order.
265 ///
266 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
267 ///
268 /// \code
269 /// const std::array<std::array<unsigned, 2>, 4> Table {
270 /// {{AArch64::ADDXri, AArch64::ADDWri},
271 /// {AArch64::ADDXrs, AArch64::ADDWrs},
272 /// {AArch64::ADDXrr, AArch64::ADDWrr},
273 /// {AArch64::SUBXri, AArch64::SUBWri},
274 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
275 /// \endcode
276 ///
277 /// Each row in the table corresponds to a different addressing mode. Each
278 /// column corresponds to a different register size.
279 ///
280 /// \attention Rows must be structured as follows:
281 /// - Row 0: The ri opcode variants
282 /// - Row 1: The rs opcode variants
283 /// - Row 2: The rr opcode variants
284 /// - Row 3: The ri opcode variants for negative immediates
285 /// - Row 4: The rx opcode variants
286 ///
287 /// \attention Columns must be structured as follows:
288 /// - Column 0: The 64-bit opcode variants
289 /// - Column 1: The 32-bit opcode variants
290 ///
291 /// \p Dst is the destination register of the binop to emit.
292 /// \p LHS is the left-hand operand of the binop to emit.
293 /// \p RHS is the right-hand operand of the binop to emit.
294 MachineInstr *emitAddSub(
295 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
297 MachineIRBuilder &MIRBuilder) const;
298 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
300 MachineIRBuilder &MIRBuilder) const;
302 MachineIRBuilder &MIRBuilder) const;
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
313 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
315 MachineIRBuilder &MIRBuilder) const;
316 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
317 const RegisterBank &DstRB, LLT ScalarTy,
318 Register VecReg, unsigned LaneIdx,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
322 MachineIRBuilder &MIRBuilder) const;
323 /// Emit a CSet for a FP compare.
324 ///
325 /// \p Dst is expected to be a 32-bit scalar register.
326 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
327 MachineIRBuilder &MIRBuilder) const;
328
329 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
330 /// Might elide the instruction if the previous instruction already sets NZCV
331 /// correctly.
332 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
333
334 /// Emit the overflow op for \p Opcode.
335 ///
336 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
337 /// G_USUBO, etc.
338 std::pair<MachineInstr *, AArch64CC::CondCode>
339 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
340 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
341
342 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
343
344 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
345 /// In some cases this is even possible with OR operations in the expression.
347 MachineIRBuilder &MIB) const;
350 AArch64CC::CondCode Predicate,
352 MachineIRBuilder &MIB) const;
354 bool Negate, Register CCOp,
355 AArch64CC::CondCode Predicate,
356 MachineIRBuilder &MIB) const;
357
358 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
359 /// \p IsNegative is true if the test should be "not zero".
360 /// This will also optimize the test bit instruction when possible.
361 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
362 MachineBasicBlock *DstMBB,
363 MachineIRBuilder &MIB) const;
364
365 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
366 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
367 MachineBasicBlock *DestMBB,
368 MachineIRBuilder &MIB) const;
369
370 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
371 // We use these manually instead of using the importer since it doesn't
372 // support SDNodeXForm.
373 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
374 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
375 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
376 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
377
378 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
379 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
380 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
381
382 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
383 unsigned Size) const;
384
385 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
386 return selectAddrModeUnscaled(Root, 1);
387 }
388 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
389 return selectAddrModeUnscaled(Root, 2);
390 }
391 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 4);
393 }
394 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 8);
396 }
397 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 16);
399 }
400
401 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
402 /// from complex pattern matchers like selectAddrModeIndexed().
403 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
404 MachineRegisterInfo &MRI) const;
405
406 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
407 unsigned Size) const;
408 template <int Width>
409 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
410 return selectAddrModeIndexed(Root, Width / 8);
411 }
412
413 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
414 const MachineRegisterInfo &MRI) const;
415 ComplexRendererFns
416 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
417 unsigned SizeInBytes) const;
418
419 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
420 /// or not a shift + extend should be folded into an addressing mode. Returns
421 /// None when this is not profitable or possible.
422 ComplexRendererFns
423 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
424 MachineOperand &Offset, unsigned SizeInBytes,
425 bool WantsExt) const;
426 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
427 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429 template <int Width>
430 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
431 return selectAddrModeXRO(Root, Width / 8);
432 }
433
434 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
435 unsigned SizeInBytes) const;
436 template <int Width>
437 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
438 return selectAddrModeWRO(Root, Width / 8);
439 }
440
441 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
442 bool AllowROR = false) const;
443
444 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
445 return selectShiftedRegister(Root);
446 }
447
448 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
449 return selectShiftedRegister(Root, true);
450 }
451
452 /// Given an extend instruction, determine the correct shift-extend type for
453 /// that instruction.
454 ///
455 /// If the instruction is going to be used in a load or store, pass
456 /// \p IsLoadStore = true.
458 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
459 bool IsLoadStore = false) const;
460
461 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
462 ///
463 /// \returns Either \p Reg if no change was necessary, or the new register
464 /// created by moving \p Reg.
465 ///
466 /// Note: This uses emitCopy right now.
467 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
468 MachineIRBuilder &MIB) const;
469
470 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
471
472 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
473
474 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
475 int OpIdx = -1) const;
476 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
477 int OpIdx = -1) const;
478 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
479 int OpIdx = -1) const;
480 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
481 int OpIdx) const;
482 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
483 int OpIdx = -1) const;
484 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
485 int OpIdx = -1) const;
486 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
487 int OpIdx = -1) const;
488 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
489 const MachineInstr &MI,
490 int OpIdx = -1) const;
491
492 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
493 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
494
495 // Optimization methods.
496 bool tryOptSelect(GSelect &Sel);
497 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
498 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
499 MachineOperand &Predicate,
500 MachineIRBuilder &MIRBuilder) const;
501
502 /// Return true if \p MI is a load or store of \p NumBytes bytes.
503 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
504
505 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
506 /// register zeroed out. In other words, the result of MI has been explicitly
507 /// zero extended.
508 bool isDef32(const MachineInstr &MI) const;
509
511 const AArch64Subtarget &STI;
512 const AArch64InstrInfo &TII;
514 const AArch64RegisterBankInfo &RBI;
515
516 bool ProduceNonFlagSettingCondBr = false;
517
518 // Some cached values used during selection.
519 // We use LR as a live-in register, and we keep track of it here as it can be
520 // clobbered by calls.
521 Register MFReturnAddr;
522
524
525#define GET_GLOBALISEL_PREDICATES_DECL
526#include "AArch64GenGlobalISel.inc"
527#undef GET_GLOBALISEL_PREDICATES_DECL
528
529// We declare the temporaries used by selectImpl() in the class to minimize the
530// cost of constructing placeholder values.
531#define GET_GLOBALISEL_TEMPORARIES_DECL
532#include "AArch64GenGlobalISel.inc"
533#undef GET_GLOBALISEL_TEMPORARIES_DECL
534};
535
536} // end anonymous namespace
537
538#define GET_GLOBALISEL_IMPL
539#include "AArch64GenGlobalISel.inc"
540#undef GET_GLOBALISEL_IMPL
541
542AArch64InstructionSelector::AArch64InstructionSelector(
543 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
544 const AArch64RegisterBankInfo &RBI)
545 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
546 RBI(RBI),
548#include "AArch64GenGlobalISel.inc"
551#include "AArch64GenGlobalISel.inc"
553{
554}
555
556// FIXME: This should be target-independent, inferred from the types declared
557// for each class in the bank.
558//
559/// Given a register bank, and a type, return the smallest register class that
560/// can represent that combination.
561static const TargetRegisterClass *
562getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
563 bool GetAllRegSet = false) {
564 if (RB.getID() == AArch64::GPRRegBankID) {
565 if (Ty.getSizeInBits() <= 32)
566 return GetAllRegSet ? &AArch64::GPR32allRegClass
567 : &AArch64::GPR32RegClass;
568 if (Ty.getSizeInBits() == 64)
569 return GetAllRegSet ? &AArch64::GPR64allRegClass
570 : &AArch64::GPR64RegClass;
571 if (Ty.getSizeInBits() == 128)
572 return &AArch64::XSeqPairsClassRegClass;
573 return nullptr;
574 }
575
576 if (RB.getID() == AArch64::FPRRegBankID) {
577 switch (Ty.getSizeInBits()) {
578 case 8:
579 return &AArch64::FPR8RegClass;
580 case 16:
581 return &AArch64::FPR16RegClass;
582 case 32:
583 return &AArch64::FPR32RegClass;
584 case 64:
585 return &AArch64::FPR64RegClass;
586 case 128:
587 return &AArch64::FPR128RegClass;
588 }
589 return nullptr;
590 }
591
592 return nullptr;
593}
594
595/// Given a register bank, and size in bits, return the smallest register class
596/// that can represent that combination.
597static const TargetRegisterClass *
598getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
599 bool GetAllRegSet = false) {
600 unsigned RegBankID = RB.getID();
601
602 if (RegBankID == AArch64::GPRRegBankID) {
603 if (SizeInBits <= 32)
604 return GetAllRegSet ? &AArch64::GPR32allRegClass
605 : &AArch64::GPR32RegClass;
606 if (SizeInBits == 64)
607 return GetAllRegSet ? &AArch64::GPR64allRegClass
608 : &AArch64::GPR64RegClass;
609 if (SizeInBits == 128)
610 return &AArch64::XSeqPairsClassRegClass;
611 }
612
613 if (RegBankID == AArch64::FPRRegBankID) {
614 switch (SizeInBits) {
615 default:
616 return nullptr;
617 case 8:
618 return &AArch64::FPR8RegClass;
619 case 16:
620 return &AArch64::FPR16RegClass;
621 case 32:
622 return &AArch64::FPR32RegClass;
623 case 64:
624 return &AArch64::FPR64RegClass;
625 case 128:
626 return &AArch64::FPR128RegClass;
627 }
628 }
629
630 return nullptr;
631}
632
633/// Returns the correct subregister to use for a given register class.
635 const TargetRegisterInfo &TRI, unsigned &SubReg) {
636 switch (TRI.getRegSizeInBits(*RC)) {
637 case 8:
638 SubReg = AArch64::bsub;
639 break;
640 case 16:
641 SubReg = AArch64::hsub;
642 break;
643 case 32:
644 if (RC != &AArch64::FPR32RegClass)
645 SubReg = AArch64::sub_32;
646 else
647 SubReg = AArch64::ssub;
648 break;
649 case 64:
650 SubReg = AArch64::dsub;
651 break;
652 default:
654 dbgs() << "Couldn't find appropriate subregister for register class.");
655 return false;
656 }
657
658 return true;
659}
660
661/// Returns the minimum size the given register bank can hold.
662static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
663 switch (RB.getID()) {
664 case AArch64::GPRRegBankID:
665 return 32;
666 case AArch64::FPRRegBankID:
667 return 8;
668 default:
669 llvm_unreachable("Tried to get minimum size for unknown register bank.");
670 }
671}
672
673/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
674/// Helper function for functions like createDTuple and createQTuple.
675///
676/// \p RegClassIDs - The list of register class IDs available for some tuple of
677/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
678/// expected to contain between 2 and 4 tuple classes.
679///
680/// \p SubRegs - The list of subregister classes associated with each register
681/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
682/// subregister class. The index of each subregister class is expected to
683/// correspond with the index of each register class.
684///
685/// \returns Either the destination register of REG_SEQUENCE instruction that
686/// was created, or the 0th element of \p Regs if \p Regs contains a single
687/// element.
689 const unsigned RegClassIDs[],
690 const unsigned SubRegs[], MachineIRBuilder &MIB) {
691 unsigned NumRegs = Regs.size();
692 if (NumRegs == 1)
693 return Regs[0];
694 assert(NumRegs >= 2 && NumRegs <= 4 &&
695 "Only support between two and 4 registers in a tuple!");
697 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
698 auto RegSequence =
699 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
700 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
701 RegSequence.addUse(Regs[I]);
702 RegSequence.addImm(SubRegs[I]);
703 }
704 return RegSequence.getReg(0);
705}
706
707/// Create a tuple of D-registers using the registers in \p Regs.
709 static const unsigned RegClassIDs[] = {
710 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
711 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
712 AArch64::dsub2, AArch64::dsub3};
713 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
714}
715
716/// Create a tuple of Q-registers using the registers in \p Regs.
718 static const unsigned RegClassIDs[] = {
719 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
720 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
721 AArch64::qsub2, AArch64::qsub3};
722 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
723}
724
725static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
726 auto &MI = *Root.getParent();
727 auto &MBB = *MI.getParent();
728 auto &MF = *MBB.getParent();
729 auto &MRI = MF.getRegInfo();
730 uint64_t Immed;
731 if (Root.isImm())
732 Immed = Root.getImm();
733 else if (Root.isCImm())
734 Immed = Root.getCImm()->getZExtValue();
735 else if (Root.isReg()) {
736 auto ValAndVReg =
738 if (!ValAndVReg)
739 return std::nullopt;
740 Immed = ValAndVReg->Value.getSExtValue();
741 } else
742 return std::nullopt;
743 return Immed;
744}
745
746/// Check whether \p I is a currently unsupported binary operation:
747/// - it has an unsized type
748/// - an operand is not a vreg
749/// - all operands are not in the same bank
750/// These are checks that should someday live in the verifier, but right now,
751/// these are mostly limitations of the aarch64 selector.
752static bool unsupportedBinOp(const MachineInstr &I,
753 const AArch64RegisterBankInfo &RBI,
755 const AArch64RegisterInfo &TRI) {
756 LLT Ty = MRI.getType(I.getOperand(0).getReg());
757 if (!Ty.isValid()) {
758 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
759 return true;
760 }
761
762 const RegisterBank *PrevOpBank = nullptr;
763 for (auto &MO : I.operands()) {
764 // FIXME: Support non-register operands.
765 if (!MO.isReg()) {
766 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
767 return true;
768 }
769
770 // FIXME: Can generic operations have physical registers operands? If
771 // so, this will need to be taught about that, and we'll need to get the
772 // bank out of the minimal class for the register.
773 // Either way, this needs to be documented (and possibly verified).
774 if (!MO.getReg().isVirtual()) {
775 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
776 return true;
777 }
778
779 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
780 if (!OpBank) {
781 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
782 return true;
783 }
784
785 if (PrevOpBank && OpBank != PrevOpBank) {
786 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
787 return true;
788 }
789 PrevOpBank = OpBank;
790 }
791 return false;
792}
793
794/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
795/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
796/// and of size \p OpSize.
797/// \returns \p GenericOpc if the combination is unsupported.
798static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
799 unsigned OpSize) {
800 switch (RegBankID) {
801 case AArch64::GPRRegBankID:
802 if (OpSize == 32) {
803 switch (GenericOpc) {
804 case TargetOpcode::G_SHL:
805 return AArch64::LSLVWr;
806 case TargetOpcode::G_LSHR:
807 return AArch64::LSRVWr;
808 case TargetOpcode::G_ASHR:
809 return AArch64::ASRVWr;
810 default:
811 return GenericOpc;
812 }
813 } else if (OpSize == 64) {
814 switch (GenericOpc) {
815 case TargetOpcode::G_PTR_ADD:
816 return AArch64::ADDXrr;
817 case TargetOpcode::G_SHL:
818 return AArch64::LSLVXr;
819 case TargetOpcode::G_LSHR:
820 return AArch64::LSRVXr;
821 case TargetOpcode::G_ASHR:
822 return AArch64::ASRVXr;
823 default:
824 return GenericOpc;
825 }
826 }
827 break;
828 case AArch64::FPRRegBankID:
829 switch (OpSize) {
830 case 32:
831 switch (GenericOpc) {
832 case TargetOpcode::G_FADD:
833 return AArch64::FADDSrr;
834 case TargetOpcode::G_FSUB:
835 return AArch64::FSUBSrr;
836 case TargetOpcode::G_FMUL:
837 return AArch64::FMULSrr;
838 case TargetOpcode::G_FDIV:
839 return AArch64::FDIVSrr;
840 default:
841 return GenericOpc;
842 }
843 case 64:
844 switch (GenericOpc) {
845 case TargetOpcode::G_FADD:
846 return AArch64::FADDDrr;
847 case TargetOpcode::G_FSUB:
848 return AArch64::FSUBDrr;
849 case TargetOpcode::G_FMUL:
850 return AArch64::FMULDrr;
851 case TargetOpcode::G_FDIV:
852 return AArch64::FDIVDrr;
853 case TargetOpcode::G_OR:
854 return AArch64::ORRv8i8;
855 default:
856 return GenericOpc;
857 }
858 }
859 break;
860 }
861 return GenericOpc;
862}
863
864/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
865/// appropriate for the (value) register bank \p RegBankID and of memory access
866/// size \p OpSize. This returns the variant with the base+unsigned-immediate
867/// addressing mode (e.g., LDRXui).
868/// \returns \p GenericOpc if the combination is unsupported.
869static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
870 unsigned OpSize) {
871 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
872 switch (RegBankID) {
873 case AArch64::GPRRegBankID:
874 switch (OpSize) {
875 case 8:
876 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
877 case 16:
878 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
879 case 32:
880 return isStore ? AArch64::STRWui : AArch64::LDRWui;
881 case 64:
882 return isStore ? AArch64::STRXui : AArch64::LDRXui;
883 }
884 break;
885 case AArch64::FPRRegBankID:
886 switch (OpSize) {
887 case 8:
888 return isStore ? AArch64::STRBui : AArch64::LDRBui;
889 case 16:
890 return isStore ? AArch64::STRHui : AArch64::LDRHui;
891 case 32:
892 return isStore ? AArch64::STRSui : AArch64::LDRSui;
893 case 64:
894 return isStore ? AArch64::STRDui : AArch64::LDRDui;
895 case 128:
896 return isStore ? AArch64::STRQui : AArch64::LDRQui;
897 }
898 break;
899 }
900 return GenericOpc;
901}
902
903/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
904/// to \p *To.
905///
906/// E.g "To = COPY SrcReg:SubReg"
908 const RegisterBankInfo &RBI, Register SrcReg,
909 const TargetRegisterClass *To, unsigned SubReg) {
910 assert(SrcReg.isValid() && "Expected a valid source register?");
911 assert(To && "Destination register class cannot be null");
912 assert(SubReg && "Expected a valid subregister");
913
914 MachineIRBuilder MIB(I);
915 auto SubRegCopy =
916 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
917 MachineOperand &RegOp = I.getOperand(1);
918 RegOp.setReg(SubRegCopy.getReg(0));
919
920 // It's possible that the destination register won't be constrained. Make
921 // sure that happens.
922 if (!I.getOperand(0).getReg().isPhysical())
923 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
924
925 return true;
926}
927
928/// Helper function to get the source and destination register classes for a
929/// copy. Returns a std::pair containing the source register class for the
930/// copy, and the destination register class for the copy. If a register class
931/// cannot be determined, then it will be nullptr.
932static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
935 const RegisterBankInfo &RBI) {
936 Register DstReg = I.getOperand(0).getReg();
937 Register SrcReg = I.getOperand(1).getReg();
938 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
939 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
940 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
941 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
942
943 // Special casing for cross-bank copies of s1s. We can technically represent
944 // a 1-bit value with any size of register. The minimum size for a GPR is 32
945 // bits. So, we need to put the FPR on 32 bits as well.
946 //
947 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
948 // then we can pull it into the helpers that get the appropriate class for a
949 // register bank. Or make a new helper that carries along some constraint
950 // information.
951 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
952 SrcSize = DstSize = 32;
953
954 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
955 getMinClassForRegBank(DstRegBank, DstSize, true)};
956}
957
958// FIXME: We need some sort of API in RBI/TRI to allow generic code to
959// constrain operands of simple instructions given a TargetRegisterClass
960// and LLT
962 const RegisterBankInfo &RBI) {
963 for (MachineOperand &MO : I.operands()) {
964 if (!MO.isReg())
965 continue;
966 Register Reg = MO.getReg();
967 if (!Reg)
968 continue;
969 if (Reg.isPhysical())
970 continue;
971 LLT Ty = MRI.getType(Reg);
972 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
973 const TargetRegisterClass *RC =
974 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
975 if (!RC) {
976 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
977 RC = getRegClassForTypeOnBank(Ty, RB);
978 if (!RC) {
980 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
981 break;
982 }
983 }
984 RBI.constrainGenericRegister(Reg, *RC, MRI);
985 }
986
987 return true;
988}
989
992 const RegisterBankInfo &RBI) {
993 Register DstReg = I.getOperand(0).getReg();
994 Register SrcReg = I.getOperand(1).getReg();
995 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
996 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
997
998 // Find the correct register classes for the source and destination registers.
999 const TargetRegisterClass *SrcRC;
1000 const TargetRegisterClass *DstRC;
1001 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1002
1003 if (!DstRC) {
1004 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1005 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1006 return false;
1007 }
1008
1009 // Is this a copy? If so, then we may need to insert a subregister copy.
1010 if (I.isCopy()) {
1011 // Yes. Check if there's anything to fix up.
1012 if (!SrcRC) {
1013 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1014 return false;
1015 }
1016
1017 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
1018 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
1019 unsigned SubReg;
1020
1021 // If the source bank doesn't support a subregister copy small enough,
1022 // then we first need to copy to the destination bank.
1023 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1024 const TargetRegisterClass *DstTempRC =
1025 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1026 getSubRegForClass(DstRC, TRI, SubReg);
1027
1028 MachineIRBuilder MIB(I);
1029 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1030 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1031 } else if (SrcSize > DstSize) {
1032 // If the source register is bigger than the destination we need to
1033 // perform a subregister copy.
1034 const TargetRegisterClass *SubRegRC =
1035 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1036 getSubRegForClass(SubRegRC, TRI, SubReg);
1037 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1038 } else if (DstSize > SrcSize) {
1039 // If the destination register is bigger than the source we need to do
1040 // a promotion using SUBREG_TO_REG.
1041 const TargetRegisterClass *PromotionRC =
1042 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1043 getSubRegForClass(SrcRC, TRI, SubReg);
1044
1045 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1046 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1047 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1048 .addImm(0)
1049 .addUse(SrcReg)
1050 .addImm(SubReg);
1051 MachineOperand &RegOp = I.getOperand(1);
1052 RegOp.setReg(PromoteReg);
1053 }
1054
1055 // If the destination is a physical register, then there's nothing to
1056 // change, so we're done.
1057 if (DstReg.isPhysical())
1058 return true;
1059 }
1060
1061 // No need to constrain SrcReg. It will get constrained when we hit another
1062 // of its use or its defs. Copies do not have constraints.
1063 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1064 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1065 << " operand\n");
1066 return false;
1067 }
1068
1069 // If this a GPR ZEXT that we want to just reduce down into a copy.
1070 // The sizes will be mismatched with the source < 32b but that's ok.
1071 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1072 I.setDesc(TII.get(AArch64::COPY));
1073 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1074 return selectCopy(I, TII, MRI, TRI, RBI);
1075 }
1076
1077 I.setDesc(TII.get(AArch64::COPY));
1078 return true;
1079}
1080
1081static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1082 if (!DstTy.isScalar() || !SrcTy.isScalar())
1083 return GenericOpc;
1084
1085 const unsigned DstSize = DstTy.getSizeInBits();
1086 const unsigned SrcSize = SrcTy.getSizeInBits();
1087
1088 switch (DstSize) {
1089 case 32:
1090 switch (SrcSize) {
1091 case 32:
1092 switch (GenericOpc) {
1093 case TargetOpcode::G_SITOFP:
1094 return AArch64::SCVTFUWSri;
1095 case TargetOpcode::G_UITOFP:
1096 return AArch64::UCVTFUWSri;
1097 case TargetOpcode::G_FPTOSI:
1098 return AArch64::FCVTZSUWSr;
1099 case TargetOpcode::G_FPTOUI:
1100 return AArch64::FCVTZUUWSr;
1101 default:
1102 return GenericOpc;
1103 }
1104 case 64:
1105 switch (GenericOpc) {
1106 case TargetOpcode::G_SITOFP:
1107 return AArch64::SCVTFUXSri;
1108 case TargetOpcode::G_UITOFP:
1109 return AArch64::UCVTFUXSri;
1110 case TargetOpcode::G_FPTOSI:
1111 return AArch64::FCVTZSUWDr;
1112 case TargetOpcode::G_FPTOUI:
1113 return AArch64::FCVTZUUWDr;
1114 default:
1115 return GenericOpc;
1116 }
1117 default:
1118 return GenericOpc;
1119 }
1120 case 64:
1121 switch (SrcSize) {
1122 case 32:
1123 switch (GenericOpc) {
1124 case TargetOpcode::G_SITOFP:
1125 return AArch64::SCVTFUWDri;
1126 case TargetOpcode::G_UITOFP:
1127 return AArch64::UCVTFUWDri;
1128 case TargetOpcode::G_FPTOSI:
1129 return AArch64::FCVTZSUXSr;
1130 case TargetOpcode::G_FPTOUI:
1131 return AArch64::FCVTZUUXSr;
1132 default:
1133 return GenericOpc;
1134 }
1135 case 64:
1136 switch (GenericOpc) {
1137 case TargetOpcode::G_SITOFP:
1138 return AArch64::SCVTFUXDri;
1139 case TargetOpcode::G_UITOFP:
1140 return AArch64::UCVTFUXDri;
1141 case TargetOpcode::G_FPTOSI:
1142 return AArch64::FCVTZSUXDr;
1143 case TargetOpcode::G_FPTOUI:
1144 return AArch64::FCVTZUUXDr;
1145 default:
1146 return GenericOpc;
1147 }
1148 default:
1149 return GenericOpc;
1150 }
1151 default:
1152 return GenericOpc;
1153 };
1154 return GenericOpc;
1155}
1156
1158AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1160 MachineIRBuilder &MIB) const {
1161 MachineRegisterInfo &MRI = *MIB.getMRI();
1162 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1163 RBI.getRegBank(True, MRI, TRI)->getID() &&
1164 "Expected both select operands to have the same regbank?");
1165 LLT Ty = MRI.getType(True);
1166 if (Ty.isVector())
1167 return nullptr;
1168 const unsigned Size = Ty.getSizeInBits();
1169 assert((Size == 32 || Size == 64) &&
1170 "Expected 32 bit or 64 bit select only?");
1171 const bool Is32Bit = Size == 32;
1172 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1173 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1174 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1176 return &*FCSel;
1177 }
1178
1179 // By default, we'll try and emit a CSEL.
1180 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1181 bool Optimized = false;
1182 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1183 &Optimized](Register &Reg, Register &OtherReg,
1184 bool Invert) {
1185 if (Optimized)
1186 return false;
1187
1188 // Attempt to fold:
1189 //
1190 // %sub = G_SUB 0, %x
1191 // %select = G_SELECT cc, %reg, %sub
1192 //
1193 // Into:
1194 // %select = CSNEG %reg, %x, cc
1195 Register MatchReg;
1196 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1197 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1198 Reg = MatchReg;
1199 if (Invert) {
1201 std::swap(Reg, OtherReg);
1202 }
1203 return true;
1204 }
1205
1206 // Attempt to fold:
1207 //
1208 // %xor = G_XOR %x, -1
1209 // %select = G_SELECT cc, %reg, %xor
1210 //
1211 // Into:
1212 // %select = CSINV %reg, %x, cc
1213 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1214 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1215 Reg = MatchReg;
1216 if (Invert) {
1218 std::swap(Reg, OtherReg);
1219 }
1220 return true;
1221 }
1222
1223 // Attempt to fold:
1224 //
1225 // %add = G_ADD %x, 1
1226 // %select = G_SELECT cc, %reg, %add
1227 //
1228 // Into:
1229 // %select = CSINC %reg, %x, cc
1230 if (mi_match(Reg, MRI,
1231 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1232 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1233 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1234 Reg = MatchReg;
1235 if (Invert) {
1237 std::swap(Reg, OtherReg);
1238 }
1239 return true;
1240 }
1241
1242 return false;
1243 };
1244
1245 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1246 // true/false values are constants.
1247 // FIXME: All of these patterns already exist in tablegen. We should be
1248 // able to import these.
1249 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1250 &Optimized]() {
1251 if (Optimized)
1252 return false;
1253 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1254 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1255 if (!TrueCst && !FalseCst)
1256 return false;
1257
1258 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1259 if (TrueCst && FalseCst) {
1260 int64_t T = TrueCst->Value.getSExtValue();
1261 int64_t F = FalseCst->Value.getSExtValue();
1262
1263 if (T == 0 && F == 1) {
1264 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1265 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1266 True = ZReg;
1267 False = ZReg;
1268 return true;
1269 }
1270
1271 if (T == 0 && F == -1) {
1272 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1273 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1274 True = ZReg;
1275 False = ZReg;
1276 return true;
1277 }
1278 }
1279
1280 if (TrueCst) {
1281 int64_t T = TrueCst->Value.getSExtValue();
1282 if (T == 1) {
1283 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1284 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1285 True = False;
1286 False = ZReg;
1288 return true;
1289 }
1290
1291 if (T == -1) {
1292 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1293 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1294 True = False;
1295 False = ZReg;
1297 return true;
1298 }
1299 }
1300
1301 if (FalseCst) {
1302 int64_t F = FalseCst->Value.getSExtValue();
1303 if (F == 1) {
1304 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1305 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1306 False = ZReg;
1307 return true;
1308 }
1309
1310 if (F == -1) {
1311 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1312 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1313 False = ZReg;
1314 return true;
1315 }
1316 }
1317 return false;
1318 };
1319
1320 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1321 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1322 Optimized |= TryOptSelectCst();
1323 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1325 return &*SelectInst;
1326}
1327
1329 switch (P) {
1330 default:
1331 llvm_unreachable("Unknown condition code!");
1332 case CmpInst::ICMP_NE:
1333 return AArch64CC::NE;
1334 case CmpInst::ICMP_EQ:
1335 return AArch64CC::EQ;
1336 case CmpInst::ICMP_SGT:
1337 return AArch64CC::GT;
1338 case CmpInst::ICMP_SGE:
1339 return AArch64CC::GE;
1340 case CmpInst::ICMP_SLT:
1341 return AArch64CC::LT;
1342 case CmpInst::ICMP_SLE:
1343 return AArch64CC::LE;
1344 case CmpInst::ICMP_UGT:
1345 return AArch64CC::HI;
1346 case CmpInst::ICMP_UGE:
1347 return AArch64CC::HS;
1348 case CmpInst::ICMP_ULT:
1349 return AArch64CC::LO;
1350 case CmpInst::ICMP_ULE:
1351 return AArch64CC::LS;
1352 }
1353}
1354
1355/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1357 AArch64CC::CondCode &CondCode,
1358 AArch64CC::CondCode &CondCode2) {
1359 CondCode2 = AArch64CC::AL;
1360 switch (CC) {
1361 default:
1362 llvm_unreachable("Unknown FP condition!");
1363 case CmpInst::FCMP_OEQ:
1364 CondCode = AArch64CC::EQ;
1365 break;
1366 case CmpInst::FCMP_OGT:
1367 CondCode = AArch64CC::GT;
1368 break;
1369 case CmpInst::FCMP_OGE:
1370 CondCode = AArch64CC::GE;
1371 break;
1372 case CmpInst::FCMP_OLT:
1373 CondCode = AArch64CC::MI;
1374 break;
1375 case CmpInst::FCMP_OLE:
1376 CondCode = AArch64CC::LS;
1377 break;
1378 case CmpInst::FCMP_ONE:
1379 CondCode = AArch64CC::MI;
1380 CondCode2 = AArch64CC::GT;
1381 break;
1382 case CmpInst::FCMP_ORD:
1383 CondCode = AArch64CC::VC;
1384 break;
1385 case CmpInst::FCMP_UNO:
1386 CondCode = AArch64CC::VS;
1387 break;
1388 case CmpInst::FCMP_UEQ:
1389 CondCode = AArch64CC::EQ;
1390 CondCode2 = AArch64CC::VS;
1391 break;
1392 case CmpInst::FCMP_UGT:
1393 CondCode = AArch64CC::HI;
1394 break;
1395 case CmpInst::FCMP_UGE:
1396 CondCode = AArch64CC::PL;
1397 break;
1398 case CmpInst::FCMP_ULT:
1399 CondCode = AArch64CC::LT;
1400 break;
1401 case CmpInst::FCMP_ULE:
1402 CondCode = AArch64CC::LE;
1403 break;
1404 case CmpInst::FCMP_UNE:
1405 CondCode = AArch64CC::NE;
1406 break;
1407 }
1408}
1409
1410/// Convert an IR fp condition code to an AArch64 CC.
1411/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1412/// should be AND'ed instead of OR'ed.
1414 AArch64CC::CondCode &CondCode,
1415 AArch64CC::CondCode &CondCode2) {
1416 CondCode2 = AArch64CC::AL;
1417 switch (CC) {
1418 default:
1419 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1420 assert(CondCode2 == AArch64CC::AL);
1421 break;
1422 case CmpInst::FCMP_ONE:
1423 // (a one b)
1424 // == ((a olt b) || (a ogt b))
1425 // == ((a ord b) && (a une b))
1426 CondCode = AArch64CC::VC;
1427 CondCode2 = AArch64CC::NE;
1428 break;
1429 case CmpInst::FCMP_UEQ:
1430 // (a ueq b)
1431 // == ((a uno b) || (a oeq b))
1432 // == ((a ule b) && (a uge b))
1433 CondCode = AArch64CC::PL;
1434 CondCode2 = AArch64CC::LE;
1435 break;
1436 }
1437}
1438
1439/// Return a register which can be used as a bit to test in a TB(N)Z.
1440static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1442 assert(Reg.isValid() && "Expected valid register!");
1443 bool HasZext = false;
1444 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1445 unsigned Opc = MI->getOpcode();
1446
1447 if (!MI->getOperand(0).isReg() ||
1448 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1449 break;
1450
1451 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1452 //
1453 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1454 // on the truncated x is the same as the bit number on x.
1455 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1456 Opc == TargetOpcode::G_TRUNC) {
1457 if (Opc == TargetOpcode::G_ZEXT)
1458 HasZext = true;
1459
1460 Register NextReg = MI->getOperand(1).getReg();
1461 // Did we find something worth folding?
1462 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1463 break;
1464
1465 // NextReg is worth folding. Keep looking.
1466 Reg = NextReg;
1467 continue;
1468 }
1469
1470 // Attempt to find a suitable operation with a constant on one side.
1471 std::optional<uint64_t> C;
1472 Register TestReg;
1473 switch (Opc) {
1474 default:
1475 break;
1476 case TargetOpcode::G_AND:
1477 case TargetOpcode::G_XOR: {
1478 TestReg = MI->getOperand(1).getReg();
1479 Register ConstantReg = MI->getOperand(2).getReg();
1480 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1481 if (!VRegAndVal) {
1482 // AND commutes, check the other side for a constant.
1483 // FIXME: Can we canonicalize the constant so that it's always on the
1484 // same side at some point earlier?
1485 std::swap(ConstantReg, TestReg);
1486 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1487 }
1488 if (VRegAndVal) {
1489 if (HasZext)
1490 C = VRegAndVal->Value.getZExtValue();
1491 else
1492 C = VRegAndVal->Value.getSExtValue();
1493 }
1494 break;
1495 }
1496 case TargetOpcode::G_ASHR:
1497 case TargetOpcode::G_LSHR:
1498 case TargetOpcode::G_SHL: {
1499 TestReg = MI->getOperand(1).getReg();
1500 auto VRegAndVal =
1501 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1502 if (VRegAndVal)
1503 C = VRegAndVal->Value.getSExtValue();
1504 break;
1505 }
1506 }
1507
1508 // Didn't find a constant or viable register. Bail out of the loop.
1509 if (!C || !TestReg.isValid())
1510 break;
1511
1512 // We found a suitable instruction with a constant. Check to see if we can
1513 // walk through the instruction.
1514 Register NextReg;
1515 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1516 switch (Opc) {
1517 default:
1518 break;
1519 case TargetOpcode::G_AND:
1520 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1521 if ((*C >> Bit) & 1)
1522 NextReg = TestReg;
1523 break;
1524 case TargetOpcode::G_SHL:
1525 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1526 // the type of the register.
1527 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1528 NextReg = TestReg;
1529 Bit = Bit - *C;
1530 }
1531 break;
1532 case TargetOpcode::G_ASHR:
1533 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1534 // in x
1535 NextReg = TestReg;
1536 Bit = Bit + *C;
1537 if (Bit >= TestRegSize)
1538 Bit = TestRegSize - 1;
1539 break;
1540 case TargetOpcode::G_LSHR:
1541 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1542 if ((Bit + *C) < TestRegSize) {
1543 NextReg = TestReg;
1544 Bit = Bit + *C;
1545 }
1546 break;
1547 case TargetOpcode::G_XOR:
1548 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1549 // appropriate.
1550 //
1551 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1552 //
1553 // tbz x', b -> tbnz x, b
1554 //
1555 // Because x' only has the b-th bit set if x does not.
1556 if ((*C >> Bit) & 1)
1557 Invert = !Invert;
1558 NextReg = TestReg;
1559 break;
1560 }
1561
1562 // Check if we found anything worth folding.
1563 if (!NextReg.isValid())
1564 return Reg;
1565 Reg = NextReg;
1566 }
1567
1568 return Reg;
1569}
1570
1571MachineInstr *AArch64InstructionSelector::emitTestBit(
1572 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1573 MachineIRBuilder &MIB) const {
1574 assert(TestReg.isValid());
1575 assert(ProduceNonFlagSettingCondBr &&
1576 "Cannot emit TB(N)Z with speculation tracking!");
1577 MachineRegisterInfo &MRI = *MIB.getMRI();
1578
1579 // Attempt to optimize the test bit by walking over instructions.
1580 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1581 LLT Ty = MRI.getType(TestReg);
1582 unsigned Size = Ty.getSizeInBits();
1583 assert(!Ty.isVector() && "Expected a scalar!");
1584 assert(Bit < 64 && "Bit is too large!");
1585
1586 // When the test register is a 64-bit register, we have to narrow to make
1587 // TBNZW work.
1588 bool UseWReg = Bit < 32;
1589 unsigned NecessarySize = UseWReg ? 32 : 64;
1590 if (Size != NecessarySize)
1591 TestReg = moveScalarRegClass(
1592 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1593 MIB);
1594
1595 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1596 {AArch64::TBZW, AArch64::TBNZW}};
1597 unsigned Opc = OpcTable[UseWReg][IsNegative];
1598 auto TestBitMI =
1599 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1600 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1601 return &*TestBitMI;
1602}
1603
1604bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1605 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1606 MachineIRBuilder &MIB) const {
1607 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1608 // Given something like this:
1609 //
1610 // %x = ...Something...
1611 // %one = G_CONSTANT i64 1
1612 // %zero = G_CONSTANT i64 0
1613 // %and = G_AND %x, %one
1614 // %cmp = G_ICMP intpred(ne), %and, %zero
1615 // %cmp_trunc = G_TRUNC %cmp
1616 // G_BRCOND %cmp_trunc, %bb.3
1617 //
1618 // We want to try and fold the AND into the G_BRCOND and produce either a
1619 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1620 //
1621 // In this case, we'd get
1622 //
1623 // TBNZ %x %bb.3
1624 //
1625
1626 // Check if the AND has a constant on its RHS which we can use as a mask.
1627 // If it's a power of 2, then it's the same as checking a specific bit.
1628 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1629 auto MaybeBit = getIConstantVRegValWithLookThrough(
1630 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1631 if (!MaybeBit)
1632 return false;
1633
1634 int32_t Bit = MaybeBit->Value.exactLogBase2();
1635 if (Bit < 0)
1636 return false;
1637
1638 Register TestReg = AndInst.getOperand(1).getReg();
1639
1640 // Emit a TB(N)Z.
1641 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1642 return true;
1643}
1644
1645MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1646 bool IsNegative,
1647 MachineBasicBlock *DestMBB,
1648 MachineIRBuilder &MIB) const {
1649 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1650 MachineRegisterInfo &MRI = *MIB.getMRI();
1651 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1652 AArch64::GPRRegBankID &&
1653 "Expected GPRs only?");
1654 auto Ty = MRI.getType(CompareReg);
1655 unsigned Width = Ty.getSizeInBits();
1656 assert(!Ty.isVector() && "Expected scalar only?");
1657 assert(Width <= 64 && "Expected width to be at most 64?");
1658 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1659 {AArch64::CBNZW, AArch64::CBNZX}};
1660 unsigned Opc = OpcTable[IsNegative][Width == 64];
1661 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1662 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1663 return &*BranchMI;
1664}
1665
1666bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1667 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1668 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1669 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1670 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1671 // totally clean. Some of them require two branches to implement.
1672 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1673 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1674 Pred);
1675 AArch64CC::CondCode CC1, CC2;
1676 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1677 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1678 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1679 if (CC2 != AArch64CC::AL)
1680 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1681 I.eraseFromParent();
1682 return true;
1683}
1684
1685bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1686 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1687 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1688 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1689 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1690 //
1691 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1692 // instructions will not be produced, as they are conditional branch
1693 // instructions that do not set flags.
1694 if (!ProduceNonFlagSettingCondBr)
1695 return false;
1696
1697 MachineRegisterInfo &MRI = *MIB.getMRI();
1698 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1699 auto Pred =
1700 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1701 Register LHS = ICmp.getOperand(2).getReg();
1702 Register RHS = ICmp.getOperand(3).getReg();
1703
1704 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1705 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1706 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1707
1708 // When we can emit a TB(N)Z, prefer that.
1709 //
1710 // Handle non-commutative condition codes first.
1711 // Note that we don't want to do this when we have a G_AND because it can
1712 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1713 if (VRegAndVal && !AndInst) {
1714 int64_t C = VRegAndVal->Value.getSExtValue();
1715
1716 // When we have a greater-than comparison, we can just test if the msb is
1717 // zero.
1718 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1719 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1720 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1721 I.eraseFromParent();
1722 return true;
1723 }
1724
1725 // When we have a less than comparison, we can just test if the msb is not
1726 // zero.
1727 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1728 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1729 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1730 I.eraseFromParent();
1731 return true;
1732 }
1733
1734 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1735 // we can test if the msb is zero.
1736 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1737 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1738 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1739 I.eraseFromParent();
1740 return true;
1741 }
1742 }
1743
1744 // Attempt to handle commutative condition codes. Right now, that's only
1745 // eq/ne.
1746 if (ICmpInst::isEquality(Pred)) {
1747 if (!VRegAndVal) {
1748 std::swap(RHS, LHS);
1749 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1750 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1751 }
1752
1753 if (VRegAndVal && VRegAndVal->Value == 0) {
1754 // If there's a G_AND feeding into this branch, try to fold it away by
1755 // emitting a TB(N)Z instead.
1756 //
1757 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1758 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1759 // would be redundant.
1760 if (AndInst &&
1761 tryOptAndIntoCompareBranch(
1762 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1763 I.eraseFromParent();
1764 return true;
1765 }
1766
1767 // Otherwise, try to emit a CB(N)Z instead.
1768 auto LHSTy = MRI.getType(LHS);
1769 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1770 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1771 I.eraseFromParent();
1772 return true;
1773 }
1774 }
1775 }
1776
1777 return false;
1778}
1779
1780bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1781 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1782 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1783 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1784 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1785 return true;
1786
1787 // Couldn't optimize. Emit a compare + a Bcc.
1788 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1789 auto PredOp = ICmp.getOperand(1);
1790 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1792 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1793 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1794 I.eraseFromParent();
1795 return true;
1796}
1797
1798bool AArch64InstructionSelector::selectCompareBranch(
1800 Register CondReg = I.getOperand(0).getReg();
1801 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1802 // Try to select the G_BRCOND using whatever is feeding the condition if
1803 // possible.
1804 unsigned CCMIOpc = CCMI->getOpcode();
1805 if (CCMIOpc == TargetOpcode::G_FCMP)
1806 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1807 if (CCMIOpc == TargetOpcode::G_ICMP)
1808 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1809
1810 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1811 // instructions will not be produced, as they are conditional branch
1812 // instructions that do not set flags.
1813 if (ProduceNonFlagSettingCondBr) {
1814 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1815 I.getOperand(1).getMBB(), MIB);
1816 I.eraseFromParent();
1817 return true;
1818 }
1819
1820 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1821 auto TstMI =
1822 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1824 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1826 .addMBB(I.getOperand(1).getMBB());
1827 I.eraseFromParent();
1828 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1829}
1830
1831/// Returns the element immediate value of a vector shift operand if found.
1832/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1833static std::optional<int64_t> getVectorShiftImm(Register Reg,
1835 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1836 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1837 return getAArch64VectorSplatScalar(*OpMI, MRI);
1838}
1839
1840/// Matches and returns the shift immediate value for a SHL instruction given
1841/// a shift operand.
1842static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1844 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1845 if (!ShiftImm)
1846 return std::nullopt;
1847 // Check the immediate is in range for a SHL.
1848 int64_t Imm = *ShiftImm;
1849 if (Imm < 0)
1850 return std::nullopt;
1851 switch (SrcTy.getElementType().getSizeInBits()) {
1852 default:
1853 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1854 return std::nullopt;
1855 case 8:
1856 if (Imm > 7)
1857 return std::nullopt;
1858 break;
1859 case 16:
1860 if (Imm > 15)
1861 return std::nullopt;
1862 break;
1863 case 32:
1864 if (Imm > 31)
1865 return std::nullopt;
1866 break;
1867 case 64:
1868 if (Imm > 63)
1869 return std::nullopt;
1870 break;
1871 }
1872 return Imm;
1873}
1874
1875bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1877 assert(I.getOpcode() == TargetOpcode::G_SHL);
1878 Register DstReg = I.getOperand(0).getReg();
1879 const LLT Ty = MRI.getType(DstReg);
1880 Register Src1Reg = I.getOperand(1).getReg();
1881 Register Src2Reg = I.getOperand(2).getReg();
1882
1883 if (!Ty.isVector())
1884 return false;
1885
1886 // Check if we have a vector of constants on RHS that we can select as the
1887 // immediate form.
1888 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1889
1890 unsigned Opc = 0;
1891 if (Ty == LLT::fixed_vector(2, 64)) {
1892 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1893 } else if (Ty == LLT::fixed_vector(4, 32)) {
1894 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1895 } else if (Ty == LLT::fixed_vector(2, 32)) {
1896 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1897 } else if (Ty == LLT::fixed_vector(4, 16)) {
1898 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1899 } else if (Ty == LLT::fixed_vector(8, 16)) {
1900 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1901 } else if (Ty == LLT::fixed_vector(16, 8)) {
1902 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1903 } else if (Ty == LLT::fixed_vector(8, 8)) {
1904 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1905 } else {
1906 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1907 return false;
1908 }
1909
1910 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1911 if (ImmVal)
1912 Shl.addImm(*ImmVal);
1913 else
1914 Shl.addUse(Src2Reg);
1916 I.eraseFromParent();
1917 return true;
1918}
1919
1920bool AArch64InstructionSelector::selectVectorAshrLshr(
1922 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1923 I.getOpcode() == TargetOpcode::G_LSHR);
1924 Register DstReg = I.getOperand(0).getReg();
1925 const LLT Ty = MRI.getType(DstReg);
1926 Register Src1Reg = I.getOperand(1).getReg();
1927 Register Src2Reg = I.getOperand(2).getReg();
1928
1929 if (!Ty.isVector())
1930 return false;
1931
1932 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1933
1934 // We expect the immediate case to be lowered in the PostLegalCombiner to
1935 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1936
1937 // There is not a shift right register instruction, but the shift left
1938 // register instruction takes a signed value, where negative numbers specify a
1939 // right shift.
1940
1941 unsigned Opc = 0;
1942 unsigned NegOpc = 0;
1943 const TargetRegisterClass *RC =
1944 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1945 if (Ty == LLT::fixed_vector(2, 64)) {
1946 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1947 NegOpc = AArch64::NEGv2i64;
1948 } else if (Ty == LLT::fixed_vector(4, 32)) {
1949 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1950 NegOpc = AArch64::NEGv4i32;
1951 } else if (Ty == LLT::fixed_vector(2, 32)) {
1952 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1953 NegOpc = AArch64::NEGv2i32;
1954 } else if (Ty == LLT::fixed_vector(4, 16)) {
1955 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1956 NegOpc = AArch64::NEGv4i16;
1957 } else if (Ty == LLT::fixed_vector(8, 16)) {
1958 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1959 NegOpc = AArch64::NEGv8i16;
1960 } else if (Ty == LLT::fixed_vector(16, 8)) {
1961 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1962 NegOpc = AArch64::NEGv16i8;
1963 } else if (Ty == LLT::fixed_vector(8, 8)) {
1964 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1965 NegOpc = AArch64::NEGv8i8;
1966 } else {
1967 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1968 return false;
1969 }
1970
1971 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1973 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1975 I.eraseFromParent();
1976 return true;
1977}
1978
1979bool AArch64InstructionSelector::selectVaStartAAPCS(
1981 return false;
1982}
1983
1984bool AArch64InstructionSelector::selectVaStartDarwin(
1987 Register ListReg = I.getOperand(0).getReg();
1988
1989 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1990
1991 int FrameIdx = FuncInfo->getVarArgsStackIndex();
1993 MF.getFunction().getCallingConv())) {
1994 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
1995 ? FuncInfo->getVarArgsGPRIndex()
1996 : FuncInfo->getVarArgsStackIndex();
1997 }
1998
1999 auto MIB =
2000 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2001 .addDef(ArgsAddrReg)
2002 .addFrameIndex(FrameIdx)
2003 .addImm(0)
2004 .addImm(0);
2005
2007
2008 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2009 .addUse(ArgsAddrReg)
2010 .addUse(ListReg)
2011 .addImm(0)
2012 .addMemOperand(*I.memoperands_begin());
2013
2015 I.eraseFromParent();
2016 return true;
2017}
2018
2019void AArch64InstructionSelector::materializeLargeCMVal(
2020 MachineInstr &I, const Value *V, unsigned OpFlags) {
2021 MachineBasicBlock &MBB = *I.getParent();
2022 MachineFunction &MF = *MBB.getParent();
2024
2025 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2026 MovZ->addOperand(MF, I.getOperand(1));
2027 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2029 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2031
2032 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2033 Register ForceDstReg) {
2034 Register DstReg = ForceDstReg
2035 ? ForceDstReg
2036 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2037 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2038 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2040 GV, MovZ->getOperand(1).getOffset(), Flags));
2041 } else {
2042 MovI->addOperand(
2043 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2044 MovZ->getOperand(1).getOffset(), Flags));
2045 }
2046 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2048 return DstReg;
2049 };
2050 Register DstReg = BuildMovK(MovZ.getReg(0),
2052 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2053 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2054}
2055
2056bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2057 MachineBasicBlock &MBB = *I.getParent();
2058 MachineFunction &MF = *MBB.getParent();
2060
2061 switch (I.getOpcode()) {
2062 case TargetOpcode::G_STORE: {
2063 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2064 MachineOperand &SrcOp = I.getOperand(0);
2065 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2066 // Allow matching with imported patterns for stores of pointers. Unlike
2067 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2068 // and constrain.
2069 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2070 Register NewSrc = Copy.getReg(0);
2071 SrcOp.setReg(NewSrc);
2072 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2073 Changed = true;
2074 }
2075 return Changed;
2076 }
2077 case TargetOpcode::G_PTR_ADD:
2078 return convertPtrAddToAdd(I, MRI);
2079 case TargetOpcode::G_LOAD: {
2080 // For scalar loads of pointers, we try to convert the dest type from p0
2081 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2082 // conversion, this should be ok because all users should have been
2083 // selected already, so the type doesn't matter for them.
2084 Register DstReg = I.getOperand(0).getReg();
2085 const LLT DstTy = MRI.getType(DstReg);
2086 if (!DstTy.isPointer())
2087 return false;
2088 MRI.setType(DstReg, LLT::scalar(64));
2089 return true;
2090 }
2091 case AArch64::G_DUP: {
2092 // Convert the type from p0 to s64 to help selection.
2093 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2094 if (!DstTy.isPointerVector())
2095 return false;
2096 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2097 MRI.setType(I.getOperand(0).getReg(),
2098 DstTy.changeElementType(LLT::scalar(64)));
2099 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2100 I.getOperand(1).setReg(NewSrc.getReg(0));
2101 return true;
2102 }
2103 case TargetOpcode::G_UITOFP:
2104 case TargetOpcode::G_SITOFP: {
2105 // If both source and destination regbanks are FPR, then convert the opcode
2106 // to G_SITOF so that the importer can select it to an fpr variant.
2107 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2108 // copy.
2109 Register SrcReg = I.getOperand(1).getReg();
2110 LLT SrcTy = MRI.getType(SrcReg);
2111 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2112 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2113 return false;
2114
2115 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2116 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2117 I.setDesc(TII.get(AArch64::G_SITOF));
2118 else
2119 I.setDesc(TII.get(AArch64::G_UITOF));
2120 return true;
2121 }
2122 return false;
2123 }
2124 default:
2125 return false;
2126 }
2127}
2128
2129/// This lowering tries to look for G_PTR_ADD instructions and then converts
2130/// them to a standard G_ADD with a COPY on the source.
2131///
2132/// The motivation behind this is to expose the add semantics to the imported
2133/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2134/// because the selector works bottom up, uses before defs. By the time we
2135/// end up trying to select a G_PTR_ADD, we should have already attempted to
2136/// fold this into addressing modes and were therefore unsuccessful.
2137bool AArch64InstructionSelector::convertPtrAddToAdd(
2139 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2140 Register DstReg = I.getOperand(0).getReg();
2141 Register AddOp1Reg = I.getOperand(1).getReg();
2142 const LLT PtrTy = MRI.getType(DstReg);
2143 if (PtrTy.getAddressSpace() != 0)
2144 return false;
2145
2146 const LLT CastPtrTy =
2147 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2148 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2149 // Set regbanks on the registers.
2150 if (PtrTy.isVector())
2151 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2152 else
2153 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2154
2155 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2156 // %dst(intty) = G_ADD %intbase, off
2157 I.setDesc(TII.get(TargetOpcode::G_ADD));
2158 MRI.setType(DstReg, CastPtrTy);
2159 I.getOperand(1).setReg(PtrToInt.getReg(0));
2160 if (!select(*PtrToInt)) {
2161 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2162 return false;
2163 }
2164
2165 // Also take the opportunity here to try to do some optimization.
2166 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2167 Register NegatedReg;
2168 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2169 return true;
2170 I.getOperand(2).setReg(NegatedReg);
2171 I.setDesc(TII.get(TargetOpcode::G_SUB));
2172 return true;
2173}
2174
2175bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2177 // We try to match the immediate variant of LSL, which is actually an alias
2178 // for a special case of UBFM. Otherwise, we fall back to the imported
2179 // selector which will match the register variant.
2180 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2181 const auto &MO = I.getOperand(2);
2182 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2183 if (!VRegAndVal)
2184 return false;
2185
2186 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2187 if (DstTy.isVector())
2188 return false;
2189 bool Is64Bit = DstTy.getSizeInBits() == 64;
2190 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2191 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2192
2193 if (!Imm1Fn || !Imm2Fn)
2194 return false;
2195
2196 auto NewI =
2197 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2198 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2199
2200 for (auto &RenderFn : *Imm1Fn)
2201 RenderFn(NewI);
2202 for (auto &RenderFn : *Imm2Fn)
2203 RenderFn(NewI);
2204
2205 I.eraseFromParent();
2206 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2207}
2208
2209bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2211 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2212 // If we're storing a scalar, it doesn't matter what register bank that
2213 // scalar is on. All that matters is the size.
2214 //
2215 // So, if we see something like this (with a 32-bit scalar as an example):
2216 //
2217 // %x:gpr(s32) = ... something ...
2218 // %y:fpr(s32) = COPY %x:gpr(s32)
2219 // G_STORE %y:fpr(s32)
2220 //
2221 // We can fix this up into something like this:
2222 //
2223 // G_STORE %x:gpr(s32)
2224 //
2225 // And then continue the selection process normally.
2226 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2227 if (!DefDstReg.isValid())
2228 return false;
2229 LLT DefDstTy = MRI.getType(DefDstReg);
2230 Register StoreSrcReg = I.getOperand(0).getReg();
2231 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2232
2233 // If we get something strange like a physical register, then we shouldn't
2234 // go any further.
2235 if (!DefDstTy.isValid())
2236 return false;
2237
2238 // Are the source and dst types the same size?
2239 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2240 return false;
2241
2242 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2243 RBI.getRegBank(DefDstReg, MRI, TRI))
2244 return false;
2245
2246 // We have a cross-bank copy, which is entering a store. Let's fold it.
2247 I.getOperand(0).setReg(DefDstReg);
2248 return true;
2249}
2250
2251bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2252 assert(I.getParent() && "Instruction should be in a basic block!");
2253 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2254
2255 MachineBasicBlock &MBB = *I.getParent();
2256 MachineFunction &MF = *MBB.getParent();
2258
2259 switch (I.getOpcode()) {
2260 case AArch64::G_DUP: {
2261 // Before selecting a DUP instruction, check if it is better selected as a
2262 // MOV or load from a constant pool.
2263 Register Src = I.getOperand(1).getReg();
2264 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2265 if (!ValAndVReg)
2266 return false;
2267 LLVMContext &Ctx = MF.getFunction().getContext();
2268 Register Dst = I.getOperand(0).getReg();
2270 MRI.getType(Dst).getNumElements(),
2271 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2272 ValAndVReg->Value));
2273 if (!emitConstantVector(Dst, CV, MIB, MRI))
2274 return false;
2275 I.eraseFromParent();
2276 return true;
2277 }
2278 case TargetOpcode::G_SEXT:
2279 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2280 // over a normal extend.
2281 if (selectUSMovFromExtend(I, MRI))
2282 return true;
2283 return false;
2284 case TargetOpcode::G_BR:
2285 return false;
2286 case TargetOpcode::G_SHL:
2287 return earlySelectSHL(I, MRI);
2288 case TargetOpcode::G_CONSTANT: {
2289 bool IsZero = false;
2290 if (I.getOperand(1).isCImm())
2291 IsZero = I.getOperand(1).getCImm()->isZero();
2292 else if (I.getOperand(1).isImm())
2293 IsZero = I.getOperand(1).getImm() == 0;
2294
2295 if (!IsZero)
2296 return false;
2297
2298 Register DefReg = I.getOperand(0).getReg();
2299 LLT Ty = MRI.getType(DefReg);
2300 if (Ty.getSizeInBits() == 64) {
2301 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2302 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2303 } else if (Ty.getSizeInBits() == 32) {
2304 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2305 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2306 } else
2307 return false;
2308
2309 I.setDesc(TII.get(TargetOpcode::COPY));
2310 return true;
2311 }
2312
2313 case TargetOpcode::G_ADD: {
2314 // Check if this is being fed by a G_ICMP on either side.
2315 //
2316 // (cmp pred, x, y) + z
2317 //
2318 // In the above case, when the cmp is true, we increment z by 1. So, we can
2319 // fold the add into the cset for the cmp by using cinc.
2320 //
2321 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2322 Register AddDst = I.getOperand(0).getReg();
2323 Register AddLHS = I.getOperand(1).getReg();
2324 Register AddRHS = I.getOperand(2).getReg();
2325 // Only handle scalars.
2326 LLT Ty = MRI.getType(AddLHS);
2327 if (Ty.isVector())
2328 return false;
2329 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2330 // bits.
2331 unsigned Size = Ty.getSizeInBits();
2332 if (Size != 32 && Size != 64)
2333 return false;
2334 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2335 if (!MRI.hasOneNonDBGUse(Reg))
2336 return nullptr;
2337 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2338 // compare.
2339 if (Size == 32)
2340 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2341 // We model scalar compares using 32-bit destinations right now.
2342 // If it's a 64-bit compare, it'll have 64-bit sources.
2343 Register ZExt;
2344 if (!mi_match(Reg, MRI,
2346 return nullptr;
2347 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2348 if (!Cmp ||
2349 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2350 return nullptr;
2351 return Cmp;
2352 };
2353 // Try to match
2354 // z + (cmp pred, x, y)
2355 MachineInstr *Cmp = MatchCmp(AddRHS);
2356 if (!Cmp) {
2357 // (cmp pred, x, y) + z
2358 std::swap(AddLHS, AddRHS);
2359 Cmp = MatchCmp(AddRHS);
2360 if (!Cmp)
2361 return false;
2362 }
2363 auto &PredOp = Cmp->getOperand(1);
2364 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2365 const AArch64CC::CondCode InvCC =
2368 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2369 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2370 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2371 I.eraseFromParent();
2372 return true;
2373 }
2374 case TargetOpcode::G_OR: {
2375 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2376 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2377 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2378 Register Dst = I.getOperand(0).getReg();
2379 LLT Ty = MRI.getType(Dst);
2380
2381 if (!Ty.isScalar())
2382 return false;
2383
2384 unsigned Size = Ty.getSizeInBits();
2385 if (Size != 32 && Size != 64)
2386 return false;
2387
2388 Register ShiftSrc;
2389 int64_t ShiftImm;
2390 Register MaskSrc;
2391 int64_t MaskImm;
2392 if (!mi_match(
2393 Dst, MRI,
2394 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2395 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2396 return false;
2397
2398 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2399 return false;
2400
2401 int64_t Immr = Size - ShiftImm;
2402 int64_t Imms = Size - ShiftImm - 1;
2403 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2404 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2405 I.eraseFromParent();
2406 return true;
2407 }
2408 case TargetOpcode::G_FENCE: {
2409 if (I.getOperand(1).getImm() == 0)
2410 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2411 else
2412 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2413 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2414 I.eraseFromParent();
2415 return true;
2416 }
2417 default:
2418 return false;
2419 }
2420}
2421
2422bool AArch64InstructionSelector::select(MachineInstr &I) {
2423 assert(I.getParent() && "Instruction should be in a basic block!");
2424 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2425
2426 MachineBasicBlock &MBB = *I.getParent();
2427 MachineFunction &MF = *MBB.getParent();
2429
2430 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2431 if (Subtarget->requiresStrictAlign()) {
2432 // We don't support this feature yet.
2433 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2434 return false;
2435 }
2436
2438
2439 unsigned Opcode = I.getOpcode();
2440 // G_PHI requires same handling as PHI
2441 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2442 // Certain non-generic instructions also need some special handling.
2443
2444 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2446
2447 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2448 const Register DefReg = I.getOperand(0).getReg();
2449 const LLT DefTy = MRI.getType(DefReg);
2450
2451 const RegClassOrRegBank &RegClassOrBank =
2452 MRI.getRegClassOrRegBank(DefReg);
2453
2454 const TargetRegisterClass *DefRC
2455 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2456 if (!DefRC) {
2457 if (!DefTy.isValid()) {
2458 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2459 return false;
2460 }
2461 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2462 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2463 if (!DefRC) {
2464 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2465 return false;
2466 }
2467 }
2468
2469 I.setDesc(TII.get(TargetOpcode::PHI));
2470
2471 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2472 }
2473
2474 if (I.isCopy())
2475 return selectCopy(I, TII, MRI, TRI, RBI);
2476
2477 if (I.isDebugInstr())
2478 return selectDebugInstr(I, MRI, RBI);
2479
2480 return true;
2481 }
2482
2483
2484 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2485 LLVM_DEBUG(
2486 dbgs() << "Generic instruction has unexpected implicit operands\n");
2487 return false;
2488 }
2489
2490 // Try to do some lowering before we start instruction selecting. These
2491 // lowerings are purely transformations on the input G_MIR and so selection
2492 // must continue after any modification of the instruction.
2493 if (preISelLower(I)) {
2494 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2495 }
2496
2497 // There may be patterns where the importer can't deal with them optimally,
2498 // but does select it to a suboptimal sequence so our custom C++ selection
2499 // code later never has a chance to work on it. Therefore, we have an early
2500 // selection attempt here to give priority to certain selection routines
2501 // over the imported ones.
2502 if (earlySelect(I))
2503 return true;
2504
2505 if (selectImpl(I, *CoverageInfo))
2506 return true;
2507
2508 LLT Ty =
2509 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2510
2511 switch (Opcode) {
2512 case TargetOpcode::G_SBFX:
2513 case TargetOpcode::G_UBFX: {
2514 static const unsigned OpcTable[2][2] = {
2515 {AArch64::UBFMWri, AArch64::UBFMXri},
2516 {AArch64::SBFMWri, AArch64::SBFMXri}};
2517 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2518 unsigned Size = Ty.getSizeInBits();
2519 unsigned Opc = OpcTable[IsSigned][Size == 64];
2520 auto Cst1 =
2521 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2522 assert(Cst1 && "Should have gotten a constant for src 1?");
2523 auto Cst2 =
2524 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2525 assert(Cst2 && "Should have gotten a constant for src 2?");
2526 auto LSB = Cst1->Value.getZExtValue();
2527 auto Width = Cst2->Value.getZExtValue();
2528 auto BitfieldInst =
2529 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2530 .addImm(LSB)
2531 .addImm(LSB + Width - 1);
2532 I.eraseFromParent();
2533 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2534 }
2535 case TargetOpcode::G_BRCOND:
2536 return selectCompareBranch(I, MF, MRI);
2537
2538 case TargetOpcode::G_BRINDIRECT: {
2539 I.setDesc(TII.get(AArch64::BR));
2541 }
2542
2543 case TargetOpcode::G_BRJT:
2544 return selectBrJT(I, MRI);
2545
2546 case AArch64::G_ADD_LOW: {
2547 // This op may have been separated from it's ADRP companion by the localizer
2548 // or some other code motion pass. Given that many CPUs will try to
2549 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2550 // which will later be expanded into an ADRP+ADD pair after scheduling.
2551 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2552 if (BaseMI->getOpcode() != AArch64::ADRP) {
2553 I.setDesc(TII.get(AArch64::ADDXri));
2554 I.addOperand(MachineOperand::CreateImm(0));
2556 }
2557 assert(TM.getCodeModel() == CodeModel::Small &&
2558 "Expected small code model");
2559 auto Op1 = BaseMI->getOperand(1);
2560 auto Op2 = I.getOperand(2);
2561 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2562 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2563 Op1.getTargetFlags())
2564 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2565 Op2.getTargetFlags());
2566 I.eraseFromParent();
2567 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2568 }
2569
2570 case TargetOpcode::G_FCONSTANT:
2571 case TargetOpcode::G_CONSTANT: {
2572 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2573
2574 const LLT s8 = LLT::scalar(8);
2575 const LLT s16 = LLT::scalar(16);
2576 const LLT s32 = LLT::scalar(32);
2577 const LLT s64 = LLT::scalar(64);
2578 const LLT s128 = LLT::scalar(128);
2579 const LLT p0 = LLT::pointer(0, 64);
2580
2581 const Register DefReg = I.getOperand(0).getReg();
2582 const LLT DefTy = MRI.getType(DefReg);
2583 const unsigned DefSize = DefTy.getSizeInBits();
2584 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2585
2586 // FIXME: Redundant check, but even less readable when factored out.
2587 if (isFP) {
2588 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2589 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2590 << " constant, expected: " << s16 << " or " << s32
2591 << " or " << s64 << " or " << s128 << '\n');
2592 return false;
2593 }
2594
2595 if (RB.getID() != AArch64::FPRRegBankID) {
2596 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2597 << " constant on bank: " << RB
2598 << ", expected: FPR\n");
2599 return false;
2600 }
2601
2602 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2603 // can be sure tablegen works correctly and isn't rescued by this code.
2604 // 0.0 is not covered by tablegen for FP128. So we will handle this
2605 // scenario in the code here.
2606 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2607 return false;
2608 } else {
2609 // s32 and s64 are covered by tablegen.
2610 if (Ty != p0 && Ty != s8 && Ty != s16) {
2611 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2612 << " constant, expected: " << s32 << ", " << s64
2613 << ", or " << p0 << '\n');
2614 return false;
2615 }
2616
2617 if (RB.getID() != AArch64::GPRRegBankID) {
2618 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2619 << " constant on bank: " << RB
2620 << ", expected: GPR\n");
2621 return false;
2622 }
2623 }
2624
2625 if (isFP) {
2626 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2627 // For 16, 64, and 128b values, emit a constant pool load.
2628 switch (DefSize) {
2629 default:
2630 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2631 case 32:
2632 case 64: {
2633 bool OptForSize = shouldOptForSize(&MF);
2634 const auto &TLI = MF.getSubtarget().getTargetLowering();
2635 // If TLI says that this fpimm is illegal, then we'll expand to a
2636 // constant pool load.
2637 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2638 EVT::getFloatingPointVT(DefSize), OptForSize))
2639 break;
2640 [[fallthrough]];
2641 }
2642 case 16:
2643 case 128: {
2644 auto *FPImm = I.getOperand(1).getFPImm();
2645 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2646 if (!LoadMI) {
2647 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2648 return false;
2649 }
2650 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2651 I.eraseFromParent();
2652 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2653 }
2654 }
2655
2656 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2657 // Either emit a FMOV, or emit a copy to emit a normal mov.
2658 const Register DefGPRReg = MRI.createVirtualRegister(
2659 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2660 MachineOperand &RegOp = I.getOperand(0);
2661 RegOp.setReg(DefGPRReg);
2662 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2663 MIB.buildCopy({DefReg}, {DefGPRReg});
2664
2665 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2666 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2667 return false;
2668 }
2669
2670 MachineOperand &ImmOp = I.getOperand(1);
2671 // FIXME: Is going through int64_t always correct?
2672 ImmOp.ChangeToImmediate(
2674 } else if (I.getOperand(1).isCImm()) {
2675 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2676 I.getOperand(1).ChangeToImmediate(Val);
2677 } else if (I.getOperand(1).isImm()) {
2678 uint64_t Val = I.getOperand(1).getImm();
2679 I.getOperand(1).ChangeToImmediate(Val);
2680 }
2681
2682 const unsigned MovOpc =
2683 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2684 I.setDesc(TII.get(MovOpc));
2686 return true;
2687 }
2688 case TargetOpcode::G_EXTRACT: {
2689 Register DstReg = I.getOperand(0).getReg();
2690 Register SrcReg = I.getOperand(1).getReg();
2691 LLT SrcTy = MRI.getType(SrcReg);
2692 LLT DstTy = MRI.getType(DstReg);
2693 (void)DstTy;
2694 unsigned SrcSize = SrcTy.getSizeInBits();
2695
2696 if (SrcTy.getSizeInBits() > 64) {
2697 // This should be an extract of an s128, which is like a vector extract.
2698 if (SrcTy.getSizeInBits() != 128)
2699 return false;
2700 // Only support extracting 64 bits from an s128 at the moment.
2701 if (DstTy.getSizeInBits() != 64)
2702 return false;
2703
2704 unsigned Offset = I.getOperand(2).getImm();
2705 if (Offset % 64 != 0)
2706 return false;
2707
2708 // Check we have the right regbank always.
2709 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2710 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2711 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2712
2713 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2714 auto NewI =
2715 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2716 .addUse(SrcReg, 0,
2717 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2718 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2719 AArch64::GPR64RegClass, NewI->getOperand(0));
2720 I.eraseFromParent();
2721 return true;
2722 }
2723
2724 // Emit the same code as a vector extract.
2725 // Offset must be a multiple of 64.
2726 unsigned LaneIdx = Offset / 64;
2727 MachineInstr *Extract = emitExtractVectorElt(
2728 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2729 if (!Extract)
2730 return false;
2731 I.eraseFromParent();
2732 return true;
2733 }
2734
2735 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2736 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2737 Ty.getSizeInBits() - 1);
2738
2739 if (SrcSize < 64) {
2740 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2741 "unexpected G_EXTRACT types");
2743 }
2744
2745 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2746 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2747 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2748 .addReg(DstReg, 0, AArch64::sub_32);
2749 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2750 AArch64::GPR32RegClass, MRI);
2751 I.getOperand(0).setReg(DstReg);
2752
2754 }
2755
2756 case TargetOpcode::G_INSERT: {
2757 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2758 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2759 unsigned DstSize = DstTy.getSizeInBits();
2760 // Larger inserts are vectors, same-size ones should be something else by
2761 // now (split up or turned into COPYs).
2762 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2763 return false;
2764
2765 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2766 unsigned LSB = I.getOperand(3).getImm();
2767 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2768 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2769 MachineInstrBuilder(MF, I).addImm(Width - 1);
2770
2771 if (DstSize < 64) {
2772 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2773 "unexpected G_INSERT types");
2775 }
2776
2777 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2778 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2779 TII.get(AArch64::SUBREG_TO_REG))
2780 .addDef(SrcReg)
2781 .addImm(0)
2782 .addUse(I.getOperand(2).getReg())
2783 .addImm(AArch64::sub_32);
2784 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2785 AArch64::GPR32RegClass, MRI);
2786 I.getOperand(2).setReg(SrcReg);
2787
2789 }
2790 case TargetOpcode::G_FRAME_INDEX: {
2791 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2792 if (Ty != LLT::pointer(0, 64)) {
2793 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2794 << ", expected: " << LLT::pointer(0, 64) << '\n');
2795 return false;
2796 }
2797 I.setDesc(TII.get(AArch64::ADDXri));
2798
2799 // MOs for a #0 shifted immediate.
2800 I.addOperand(MachineOperand::CreateImm(0));
2801 I.addOperand(MachineOperand::CreateImm(0));
2802
2804 }
2805
2806 case TargetOpcode::G_GLOBAL_VALUE: {
2807 const GlobalValue *GV = nullptr;
2808 unsigned OpFlags;
2809 if (I.getOperand(1).isSymbol()) {
2810 OpFlags = I.getOperand(1).getTargetFlags();
2811 // Currently only used by "RtLibUseGOT".
2812 assert(OpFlags == AArch64II::MO_GOT);
2813 } else {
2814 GV = I.getOperand(1).getGlobal();
2815 if (GV->isThreadLocal())
2816 return selectTLSGlobalValue(I, MRI);
2817 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2818 }
2819
2820 if (OpFlags & AArch64II::MO_GOT) {
2821 I.setDesc(TII.get(AArch64::LOADgot));
2822 I.getOperand(1).setTargetFlags(OpFlags);
2823 } else if (TM.getCodeModel() == CodeModel::Large &&
2824 !TM.isPositionIndependent()) {
2825 // Materialize the global using movz/movk instructions.
2826 materializeLargeCMVal(I, GV, OpFlags);
2827 I.eraseFromParent();
2828 return true;
2829 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2830 I.setDesc(TII.get(AArch64::ADR));
2831 I.getOperand(1).setTargetFlags(OpFlags);
2832 } else {
2833 I.setDesc(TII.get(AArch64::MOVaddr));
2834 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2835 MachineInstrBuilder MIB(MF, I);
2836 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2838 }
2840 }
2841
2842 case TargetOpcode::G_ZEXTLOAD:
2843 case TargetOpcode::G_LOAD:
2844 case TargetOpcode::G_STORE: {
2845 GLoadStore &LdSt = cast<GLoadStore>(I);
2846 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2847 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2848
2849 if (PtrTy != LLT::pointer(0, 64)) {
2850 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2851 << ", expected: " << LLT::pointer(0, 64) << '\n');
2852 return false;
2853 }
2854
2855 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2856 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2857 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2858
2859 // Need special instructions for atomics that affect ordering.
2860 if (Order != AtomicOrdering::NotAtomic &&
2861 Order != AtomicOrdering::Unordered &&
2862 Order != AtomicOrdering::Monotonic) {
2863 assert(!isa<GZExtLoad>(LdSt));
2864 assert(MemSizeInBytes <= 8 &&
2865 "128-bit atomics should already be custom-legalized");
2866
2867 if (isa<GLoad>(LdSt)) {
2868 static constexpr unsigned LDAPROpcodes[] = {
2869 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2870 static constexpr unsigned LDAROpcodes[] = {
2871 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2872 ArrayRef<unsigned> Opcodes =
2873 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2874 ? LDAPROpcodes
2875 : LDAROpcodes;
2876 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2877 } else {
2878 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2879 AArch64::STLRW, AArch64::STLRX};
2880 Register ValReg = LdSt.getReg(0);
2881 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2882 // Emit a subreg copy of 32 bits.
2883 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2884 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2885 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2886 I.getOperand(0).setReg(NewVal);
2887 }
2888 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2889 }
2891 return true;
2892 }
2893
2894#ifndef NDEBUG
2895 const Register PtrReg = LdSt.getPointerReg();
2896 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2897 // Check that the pointer register is valid.
2898 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2899 "Load/Store pointer operand isn't a GPR");
2900 assert(MRI.getType(PtrReg).isPointer() &&
2901 "Load/Store pointer operand isn't a pointer");
2902#endif
2903
2904 const Register ValReg = LdSt.getReg(0);
2905 const LLT ValTy = MRI.getType(ValReg);
2906 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2907
2908 // The code below doesn't support truncating stores, so we need to split it
2909 // again.
2910 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2911 unsigned SubReg;
2912 LLT MemTy = LdSt.getMMO().getMemoryType();
2913 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2914 if (!getSubRegForClass(RC, TRI, SubReg))
2915 return false;
2916
2917 // Generate a subreg copy.
2918 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2919 .addReg(ValReg, 0, SubReg)
2920 .getReg(0);
2921 RBI.constrainGenericRegister(Copy, *RC, MRI);
2922 LdSt.getOperand(0).setReg(Copy);
2923 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2924 // If this is an any-extending load from the FPR bank, split it into a regular
2925 // load + extend.
2926 if (RB.getID() == AArch64::FPRRegBankID) {
2927 unsigned SubReg;
2928 LLT MemTy = LdSt.getMMO().getMemoryType();
2929 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2930 if (!getSubRegForClass(RC, TRI, SubReg))
2931 return false;
2932 Register OldDst = LdSt.getReg(0);
2933 Register NewDst =
2934 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2935 LdSt.getOperand(0).setReg(NewDst);
2936 MRI.setRegBank(NewDst, RB);
2937 // Generate a SUBREG_TO_REG to extend it.
2938 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2939 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2940 .addImm(0)
2941 .addUse(NewDst)
2942 .addImm(SubReg);
2943 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2944 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2945 MIB.setInstr(LdSt);
2946 }
2947 }
2948
2949 // Helper lambda for partially selecting I. Either returns the original
2950 // instruction with an updated opcode, or a new instruction.
2951 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2952 bool IsStore = isa<GStore>(I);
2953 const unsigned NewOpc =
2954 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2955 if (NewOpc == I.getOpcode())
2956 return nullptr;
2957 // Check if we can fold anything into the addressing mode.
2958 auto AddrModeFns =
2959 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2960 if (!AddrModeFns) {
2961 // Can't fold anything. Use the original instruction.
2962 I.setDesc(TII.get(NewOpc));
2963 I.addOperand(MachineOperand::CreateImm(0));
2964 return &I;
2965 }
2966
2967 // Folded something. Create a new instruction and return it.
2968 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2969 Register CurValReg = I.getOperand(0).getReg();
2970 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2971 NewInst.cloneMemRefs(I);
2972 for (auto &Fn : *AddrModeFns)
2973 Fn(NewInst);
2974 I.eraseFromParent();
2975 return &*NewInst;
2976 };
2977
2978 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2979 if (!LoadStore)
2980 return false;
2981
2982 // If we're storing a 0, use WZR/XZR.
2983 if (Opcode == TargetOpcode::G_STORE) {
2985 LoadStore->getOperand(0).getReg(), MRI);
2986 if (CVal && CVal->Value == 0) {
2987 switch (LoadStore->getOpcode()) {
2988 case AArch64::STRWui:
2989 case AArch64::STRHHui:
2990 case AArch64::STRBBui:
2991 LoadStore->getOperand(0).setReg(AArch64::WZR);
2992 break;
2993 case AArch64::STRXui:
2994 LoadStore->getOperand(0).setReg(AArch64::XZR);
2995 break;
2996 }
2997 }
2998 }
2999
3000 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3001 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3002 // The any/zextload from a smaller type to i32 should be handled by the
3003 // importer.
3004 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3005 return false;
3006 // If we have an extending load then change the load's type to be a
3007 // narrower reg and zero_extend with SUBREG_TO_REG.
3008 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3009 Register DstReg = LoadStore->getOperand(0).getReg();
3010 LoadStore->getOperand(0).setReg(LdReg);
3011
3012 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3013 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3014 .addImm(0)
3015 .addUse(LdReg)
3016 .addImm(AArch64::sub_32);
3017 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3018 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3019 MRI);
3020 }
3021 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3022 }
3023
3024 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3025 case TargetOpcode::G_INDEXED_SEXTLOAD:
3026 return selectIndexedExtLoad(I, MRI);
3027 case TargetOpcode::G_INDEXED_LOAD:
3028 return selectIndexedLoad(I, MRI);
3029 case TargetOpcode::G_INDEXED_STORE:
3030 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3031
3032 case TargetOpcode::G_LSHR:
3033 case TargetOpcode::G_ASHR:
3034 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3035 return selectVectorAshrLshr(I, MRI);
3036 [[fallthrough]];
3037 case TargetOpcode::G_SHL:
3038 if (Opcode == TargetOpcode::G_SHL &&
3039 MRI.getType(I.getOperand(0).getReg()).isVector())
3040 return selectVectorSHL(I, MRI);
3041
3042 // These shifts were legalized to have 64 bit shift amounts because we
3043 // want to take advantage of the selection patterns that assume the
3044 // immediates are s64s, however, selectBinaryOp will assume both operands
3045 // will have the same bit size.
3046 {
3047 Register SrcReg = I.getOperand(1).getReg();
3048 Register ShiftReg = I.getOperand(2).getReg();
3049 const LLT ShiftTy = MRI.getType(ShiftReg);
3050 const LLT SrcTy = MRI.getType(SrcReg);
3051 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3052 ShiftTy.getSizeInBits() == 64) {
3053 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3054 // Insert a subregister copy to implement a 64->32 trunc
3055 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3056 .addReg(ShiftReg, 0, AArch64::sub_32);
3057 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3058 I.getOperand(2).setReg(Trunc.getReg(0));
3059 }
3060 }
3061 [[fallthrough]];
3062 case TargetOpcode::G_OR: {
3063 // Reject the various things we don't support yet.
3064 if (unsupportedBinOp(I, RBI, MRI, TRI))
3065 return false;
3066
3067 const unsigned OpSize = Ty.getSizeInBits();
3068
3069 const Register DefReg = I.getOperand(0).getReg();
3070 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3071
3072 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3073 if (NewOpc == I.getOpcode())
3074 return false;
3075
3076 I.setDesc(TII.get(NewOpc));
3077 // FIXME: Should the type be always reset in setDesc?
3078
3079 // Now that we selected an opcode, we need to constrain the register
3080 // operands to use appropriate classes.
3082 }
3083
3084 case TargetOpcode::G_PTR_ADD: {
3085 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3086 I.eraseFromParent();
3087 return true;
3088 }
3089
3090 case TargetOpcode::G_SADDE:
3091 case TargetOpcode::G_UADDE:
3092 case TargetOpcode::G_SSUBE:
3093 case TargetOpcode::G_USUBE:
3094 case TargetOpcode::G_SADDO:
3095 case TargetOpcode::G_UADDO:
3096 case TargetOpcode::G_SSUBO:
3097 case TargetOpcode::G_USUBO:
3098 return selectOverflowOp(I, MRI);
3099
3100 case TargetOpcode::G_PTRMASK: {
3101 Register MaskReg = I.getOperand(2).getReg();
3102 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3103 // TODO: Implement arbitrary cases
3104 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3105 return false;
3106
3107 uint64_t Mask = *MaskVal;
3108 I.setDesc(TII.get(AArch64::ANDXri));
3109 I.getOperand(2).ChangeToImmediate(
3111
3113 }
3114 case TargetOpcode::G_PTRTOINT:
3115 case TargetOpcode::G_TRUNC: {
3116 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3117 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3118
3119 const Register DstReg = I.getOperand(0).getReg();
3120 const Register SrcReg = I.getOperand(1).getReg();
3121
3122 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3123 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3124
3125 if (DstRB.getID() != SrcRB.getID()) {
3126 LLVM_DEBUG(
3127 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3128 return false;
3129 }
3130
3131 if (DstRB.getID() == AArch64::GPRRegBankID) {
3132 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3133 if (!DstRC)
3134 return false;
3135
3136 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3137 if (!SrcRC)
3138 return false;
3139
3140 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3141 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3142 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3143 return false;
3144 }
3145
3146 if (DstRC == SrcRC) {
3147 // Nothing to be done
3148 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3149 SrcTy == LLT::scalar(64)) {
3150 llvm_unreachable("TableGen can import this case");
3151 return false;
3152 } else if (DstRC == &AArch64::GPR32RegClass &&
3153 SrcRC == &AArch64::GPR64RegClass) {
3154 I.getOperand(1).setSubReg(AArch64::sub_32);
3155 } else {
3156 LLVM_DEBUG(
3157 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3158 return false;
3159 }
3160
3161 I.setDesc(TII.get(TargetOpcode::COPY));
3162 return true;
3163 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3164 if (DstTy == LLT::fixed_vector(4, 16) &&
3165 SrcTy == LLT::fixed_vector(4, 32)) {
3166 I.setDesc(TII.get(AArch64::XTNv4i16));
3168 return true;
3169 }
3170
3171 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3172 MachineInstr *Extract = emitExtractVectorElt(
3173 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3174 if (!Extract)
3175 return false;
3176 I.eraseFromParent();
3177 return true;
3178 }
3179
3180 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3181 if (Opcode == TargetOpcode::G_PTRTOINT) {
3182 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3183 I.setDesc(TII.get(TargetOpcode::COPY));
3184 return selectCopy(I, TII, MRI, TRI, RBI);
3185 }
3186 }
3187
3188 return false;
3189 }
3190
3191 case TargetOpcode::G_ANYEXT: {
3192 if (selectUSMovFromExtend(I, MRI))
3193 return true;
3194
3195 const Register DstReg = I.getOperand(0).getReg();
3196 const Register SrcReg = I.getOperand(1).getReg();
3197
3198 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3199 if (RBDst.getID() != AArch64::GPRRegBankID) {
3200 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3201 << ", expected: GPR\n");
3202 return false;
3203 }
3204
3205 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3206 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3207 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3208 << ", expected: GPR\n");
3209 return false;
3210 }
3211
3212 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3213
3214 if (DstSize == 0) {
3215 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3216 return false;
3217 }
3218
3219 if (DstSize != 64 && DstSize > 32) {
3220 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3221 << ", expected: 32 or 64\n");
3222 return false;
3223 }
3224 // At this point G_ANYEXT is just like a plain COPY, but we need
3225 // to explicitly form the 64-bit value if any.
3226 if (DstSize > 32) {
3227 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3228 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3229 .addDef(ExtSrc)
3230 .addImm(0)
3231 .addUse(SrcReg)
3232 .addImm(AArch64::sub_32);
3233 I.getOperand(1).setReg(ExtSrc);
3234 }
3235 return selectCopy(I, TII, MRI, TRI, RBI);
3236 }
3237
3238 case TargetOpcode::G_ZEXT:
3239 case TargetOpcode::G_SEXT_INREG:
3240 case TargetOpcode::G_SEXT: {
3241 if (selectUSMovFromExtend(I, MRI))
3242 return true;
3243
3244 unsigned Opcode = I.getOpcode();
3245 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3246 const Register DefReg = I.getOperand(0).getReg();
3247 Register SrcReg = I.getOperand(1).getReg();
3248 const LLT DstTy = MRI.getType(DefReg);
3249 const LLT SrcTy = MRI.getType(SrcReg);
3250 unsigned DstSize = DstTy.getSizeInBits();
3251 unsigned SrcSize = SrcTy.getSizeInBits();
3252
3253 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3254 // extended is encoded in the imm.
3255 if (Opcode == TargetOpcode::G_SEXT_INREG)
3256 SrcSize = I.getOperand(2).getImm();
3257
3258 if (DstTy.isVector())
3259 return false; // Should be handled by imported patterns.
3260
3261 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3262 AArch64::GPRRegBankID &&
3263 "Unexpected ext regbank");
3264
3265 MachineInstr *ExtI;
3266
3267 // First check if we're extending the result of a load which has a dest type
3268 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3269 // GPR register on AArch64 and all loads which are smaller automatically
3270 // zero-extend the upper bits. E.g.
3271 // %v(s8) = G_LOAD %p, :: (load 1)
3272 // %v2(s32) = G_ZEXT %v(s8)
3273 if (!IsSigned) {
3274 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3275 bool IsGPR =
3276 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3277 if (LoadMI && IsGPR) {
3278 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3279 unsigned BytesLoaded = MemOp->getSize().getValue();
3280 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3281 return selectCopy(I, TII, MRI, TRI, RBI);
3282 }
3283
3284 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3285 // + SUBREG_TO_REG.
3286 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3287 Register SubregToRegSrc =
3288 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3289 const Register ZReg = AArch64::WZR;
3290 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3291 .addImm(0);
3292
3293 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3294 .addImm(0)
3295 .addUse(SubregToRegSrc)
3296 .addImm(AArch64::sub_32);
3297
3298 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3299 MRI)) {
3300 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3301 return false;
3302 }
3303
3304 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3305 MRI)) {
3306 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3307 return false;
3308 }
3309
3310 I.eraseFromParent();
3311 return true;
3312 }
3313 }
3314
3315 if (DstSize == 64) {
3316 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3317 // FIXME: Can we avoid manually doing this?
3318 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3319 MRI)) {
3320 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3321 << " operand\n");
3322 return false;
3323 }
3324 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3325 {&AArch64::GPR64RegClass}, {})
3326 .addImm(0)
3327 .addUse(SrcReg)
3328 .addImm(AArch64::sub_32)
3329 .getReg(0);
3330 }
3331
3332 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3333 {DefReg}, {SrcReg})
3334 .addImm(0)
3335 .addImm(SrcSize - 1);
3336 } else if (DstSize <= 32) {
3337 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3338 {DefReg}, {SrcReg})
3339 .addImm(0)
3340 .addImm(SrcSize - 1);
3341 } else {
3342 return false;
3343 }
3344
3346 I.eraseFromParent();
3347 return true;
3348 }
3349
3350 case TargetOpcode::G_SITOFP:
3351 case TargetOpcode::G_UITOFP:
3352 case TargetOpcode::G_FPTOSI:
3353 case TargetOpcode::G_FPTOUI: {
3354 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3355 SrcTy = MRI.getType(I.getOperand(1).getReg());
3356 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3357 if (NewOpc == Opcode)
3358 return false;
3359
3360 I.setDesc(TII.get(NewOpc));
3362 I.setFlags(MachineInstr::NoFPExcept);
3363
3364 return true;
3365 }
3366
3367 case TargetOpcode::G_FREEZE:
3368 return selectCopy(I, TII, MRI, TRI, RBI);
3369
3370 case TargetOpcode::G_INTTOPTR:
3371 // The importer is currently unable to import pointer types since they
3372 // didn't exist in SelectionDAG.
3373 return selectCopy(I, TII, MRI, TRI, RBI);
3374
3375 case TargetOpcode::G_BITCAST:
3376 // Imported SelectionDAG rules can handle every bitcast except those that
3377 // bitcast from a type to the same type. Ideally, these shouldn't occur
3378 // but we might not run an optimizer that deletes them. The other exception
3379 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3380 // of them.
3381 return selectCopy(I, TII, MRI, TRI, RBI);
3382
3383 case TargetOpcode::G_SELECT: {
3384 auto &Sel = cast<GSelect>(I);
3385 const Register CondReg = Sel.getCondReg();
3386 const Register TReg = Sel.getTrueReg();
3387 const Register FReg = Sel.getFalseReg();
3388
3389 if (tryOptSelect(Sel))
3390 return true;
3391
3392 // Make sure to use an unused vreg instead of wzr, so that the peephole
3393 // optimizations will be able to optimize these.
3394 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3395 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3396 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3398 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3399 return false;
3400 Sel.eraseFromParent();
3401 return true;
3402 }
3403 case TargetOpcode::G_ICMP: {
3404 if (Ty.isVector())
3405 return false;
3406
3407 if (Ty != LLT::scalar(32)) {
3408 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3409 << ", expected: " << LLT::scalar(32) << '\n');
3410 return false;
3411 }
3412
3413 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3414 const AArch64CC::CondCode InvCC =
3416 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3417 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3418 /*Src2=*/AArch64::WZR, InvCC, MIB);
3419 I.eraseFromParent();
3420 return true;
3421 }
3422
3423 case TargetOpcode::G_FCMP: {
3424 CmpInst::Predicate Pred =
3425 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3426 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3427 Pred) ||
3428 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3429 return false;
3430 I.eraseFromParent();
3431 return true;
3432 }
3433 case TargetOpcode::G_VASTART:
3434 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3435 : selectVaStartAAPCS(I, MF, MRI);
3436 case TargetOpcode::G_INTRINSIC:
3437 return selectIntrinsic(I, MRI);
3438 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3439 return selectIntrinsicWithSideEffects(I, MRI);
3440 case TargetOpcode::G_IMPLICIT_DEF: {
3441 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3442 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3443 const Register DstReg = I.getOperand(0).getReg();
3444 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3445 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3446 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3447 return true;
3448 }
3449 case TargetOpcode::G_BLOCK_ADDR: {
3450 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3451 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3452 I.eraseFromParent();
3453 return true;
3454 } else {
3455 I.setDesc(TII.get(AArch64::MOVaddrBA));
3456 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3457 I.getOperand(0).getReg())
3458 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3459 /* Offset */ 0, AArch64II::MO_PAGE)
3461 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3463 I.eraseFromParent();
3464 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3465 }
3466 }
3467 case AArch64::G_DUP: {
3468 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3469 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3470 // difficult because at RBS we may end up pessimizing the fpr case if we
3471 // decided to add an anyextend to fix this. Manual selection is the most
3472 // robust solution for now.
3473 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3474 AArch64::GPRRegBankID)
3475 return false; // We expect the fpr regbank case to be imported.
3476 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3477 if (VecTy == LLT::fixed_vector(8, 8))
3478 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3479 else if (VecTy == LLT::fixed_vector(16, 8))
3480 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3481 else if (VecTy == LLT::fixed_vector(4, 16))
3482 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3483 else if (VecTy == LLT::fixed_vector(8, 16))
3484 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3485 else
3486 return false;
3488 }
3489 case TargetOpcode::G_BUILD_VECTOR:
3490 return selectBuildVector(I, MRI);
3491 case TargetOpcode::G_MERGE_VALUES:
3492 return selectMergeValues(I, MRI);
3493 case TargetOpcode::G_UNMERGE_VALUES:
3494 return selectUnmergeValues(I, MRI);
3495 case TargetOpcode::G_SHUFFLE_VECTOR:
3496 return selectShuffleVector(I, MRI);
3497 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3498 return selectExtractElt(I, MRI);
3499 case TargetOpcode::G_CONCAT_VECTORS:
3500 return selectConcatVectors(I, MRI);
3501 case TargetOpcode::G_JUMP_TABLE:
3502 return selectJumpTable(I, MRI);
3503 case TargetOpcode::G_MEMCPY:
3504 case TargetOpcode::G_MEMCPY_INLINE:
3505 case TargetOpcode::G_MEMMOVE:
3506 case TargetOpcode::G_MEMSET:
3507 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3508 return selectMOPS(I, MRI);
3509 }
3510
3511 return false;
3512}
3513
3514bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3515 MachineIRBuilderState OldMIBState = MIB.getState();
3516 bool Success = select(I);
3517 MIB.setState(OldMIBState);
3518 return Success;
3519}
3520
3521bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3523 unsigned Mopcode;
3524 switch (GI.getOpcode()) {
3525 case TargetOpcode::G_MEMCPY:
3526 case TargetOpcode::G_MEMCPY_INLINE:
3527 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3528 break;
3529 case TargetOpcode::G_MEMMOVE:
3530 Mopcode = AArch64::MOPSMemoryMovePseudo;
3531 break;
3532 case TargetOpcode::G_MEMSET:
3533 // For tagged memset see llvm.aarch64.mops.memset.tag
3534 Mopcode = AArch64::MOPSMemorySetPseudo;
3535 break;
3536 }
3537
3538 auto &DstPtr = GI.getOperand(0);
3539 auto &SrcOrVal = GI.getOperand(1);
3540 auto &Size = GI.getOperand(2);
3541
3542 // Create copies of the registers that can be clobbered.
3543 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3544 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3545 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3546
3547 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3548 const auto &SrcValRegClass =
3549 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3550
3551 // Constrain to specific registers
3552 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3553 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3554 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3555
3556 MIB.buildCopy(DstPtrCopy, DstPtr);
3557 MIB.buildCopy(SrcValCopy, SrcOrVal);
3558 MIB.buildCopy(SizeCopy, Size);
3559
3560 // New instruction uses the copied registers because it must update them.
3561 // The defs are not used since they don't exist in G_MEM*. They are still
3562 // tied.
3563 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3564 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3565 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3566 if (IsSet) {
3567 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3568 {DstPtrCopy, SizeCopy, SrcValCopy});
3569 } else {
3570 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3571 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3572 {DstPtrCopy, SrcValCopy, SizeCopy});
3573 }
3574
3575 GI.eraseFromParent();
3576 return true;
3577}
3578
3579bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3581 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3582 Register JTAddr = I.getOperand(0).getReg();
3583 unsigned JTI = I.getOperand(1).getIndex();
3584 Register Index = I.getOperand(2).getReg();
3585
3586 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3587 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3588
3589 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3590 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3591 {TargetReg, ScratchReg}, {JTAddr, Index})
3592 .addJumpTableIndex(JTI);
3593 // Save the jump table info.
3594 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3595 {static_cast<int64_t>(JTI)});
3596 // Build the indirect branch.
3597 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3598 I.eraseFromParent();
3599 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3600}
3601
3602bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3604 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3605 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3606
3607 Register DstReg = I.getOperand(0).getReg();
3608 unsigned JTI = I.getOperand(1).getIndex();
3609 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3610 auto MovMI =
3611 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3612 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3614 I.eraseFromParent();
3615 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3616}
3617
3618bool AArch64InstructionSelector::selectTLSGlobalValue(
3620 if (!STI.isTargetMachO())
3621 return false;
3622 MachineFunction &MF = *I.getParent()->getParent();
3623 MF.getFrameInfo().setAdjustsStack(true);
3624
3625 const auto &GlobalOp = I.getOperand(1);
3626 assert(GlobalOp.getOffset() == 0 &&
3627 "Shouldn't have an offset on TLS globals!");
3628 const GlobalValue &GV = *GlobalOp.getGlobal();
3629
3630 auto LoadGOT =
3631 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3632 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3633
3634 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3635 {LoadGOT.getReg(0)})
3636 .addImm(0);
3637
3638 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3639 // TLS calls preserve all registers except those that absolutely must be
3640 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3641 // silly).
3642 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3643 .addUse(AArch64::X0, RegState::Implicit)
3644 .addDef(AArch64::X0, RegState::Implicit)
3645 .addRegMask(TRI.getTLSCallPreservedMask());
3646
3647 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3648 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3649 MRI);
3650 I.eraseFromParent();
3651 return true;
3652}
3653
3654MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3655 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3656 MachineIRBuilder &MIRBuilder) const {
3657 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3658
3659 auto BuildFn = [&](unsigned SubregIndex) {
3660 auto Ins =
3661 MIRBuilder
3662 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3663 .addImm(SubregIndex);
3666 return &*Ins;
3667 };
3668
3669 switch (EltSize) {
3670 case 8:
3671 return BuildFn(AArch64::bsub);
3672 case 16:
3673 return BuildFn(AArch64::hsub);
3674 case 32:
3675 return BuildFn(AArch64::ssub);
3676 case 64:
3677 return BuildFn(AArch64::dsub);
3678 default:
3679 return nullptr;
3680 }
3681}
3682
3684AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3685 MachineIRBuilder &MIB,
3686 MachineRegisterInfo &MRI) const {
3687 LLT DstTy = MRI.getType(DstReg);
3688 const TargetRegisterClass *RC =
3689 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3690 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3691 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3692 return nullptr;
3693 }
3694 unsigned SubReg = 0;
3695 if (!getSubRegForClass(RC, TRI, SubReg))
3696 return nullptr;
3697 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3698 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3699 << DstTy.getSizeInBits() << "\n");
3700 return nullptr;
3701 }
3702 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3703 .addReg(SrcReg, 0, SubReg);
3704 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3705 return Copy;
3706}
3707
3708bool AArch64InstructionSelector::selectMergeValues(
3710 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3711 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3712 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3713 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3714 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3715
3716 if (I.getNumOperands() != 3)
3717 return false;
3718
3719 // Merging 2 s64s into an s128.
3720 if (DstTy == LLT::scalar(128)) {
3721 if (SrcTy.getSizeInBits() != 64)
3722 return false;
3723 Register DstReg = I.getOperand(0).getReg();
3724 Register Src1Reg = I.getOperand(1).getReg();
3725 Register Src2Reg = I.getOperand(2).getReg();
3726 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3727 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3728 /* LaneIdx */ 0, RB, MIB);
3729 if (!InsMI)
3730 return false;
3731 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3732 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3733 if (!Ins2MI)
3734 return false;
3737 I.eraseFromParent();
3738 return true;
3739 }
3740
3741 if (RB.getID() != AArch64::GPRRegBankID)
3742 return false;
3743
3744 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3745 return false;
3746
3747 auto *DstRC = &AArch64::GPR64RegClass;
3748 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3749 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3750 TII.get(TargetOpcode::SUBREG_TO_REG))
3751 .addDef(SubToRegDef)
3752 .addImm(0)
3753 .addUse(I.getOperand(1).getReg())
3754 .addImm(AArch64::sub_32);
3755 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3756 // Need to anyext the second scalar before we can use bfm
3757 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3758 TII.get(TargetOpcode::SUBREG_TO_REG))
3759 .addDef(SubToRegDef2)
3760 .addImm(0)
3761 .addUse(I.getOperand(2).getReg())
3762 .addImm(AArch64::sub_32);
3763 MachineInstr &BFM =
3764 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3765 .addDef(I.getOperand(0).getReg())
3766 .addUse(SubToRegDef)
3767 .addUse(SubToRegDef2)
3768 .addImm(32)
3769 .addImm(31);
3770 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3771 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3773 I.eraseFromParent();
3774 return true;
3775}
3776
3777static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3778 const unsigned EltSize) {
3779 // Choose a lane copy opcode and subregister based off of the size of the
3780 // vector's elements.
3781 switch (EltSize) {
3782 case 8:
3783 CopyOpc = AArch64::DUPi8;
3784 ExtractSubReg = AArch64::bsub;
3785 break;
3786 case 16:
3787 CopyOpc = AArch64::DUPi16;
3788 ExtractSubReg = AArch64::hsub;
3789 break;
3790 case 32:
3791 CopyOpc = AArch64::DUPi32;
3792 ExtractSubReg = AArch64::ssub;
3793 break;
3794 case 64:
3795 CopyOpc = AArch64::DUPi64;
3796 ExtractSubReg = AArch64::dsub;
3797 break;
3798 default:
3799 // Unknown size, bail out.
3800 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3801 return false;
3802 }
3803 return true;
3804}
3805
3806MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3807 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3808 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3809 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3810 unsigned CopyOpc = 0;
3811 unsigned ExtractSubReg = 0;
3812 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3813 LLVM_DEBUG(
3814 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3815 return nullptr;
3816 }
3817
3818 const TargetRegisterClass *DstRC =
3819 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3820 if (!DstRC) {
3821 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3822 return nullptr;
3823 }
3824
3825 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3826 const LLT &VecTy = MRI.getType(VecReg);
3827 const TargetRegisterClass *VecRC =
3828 getRegClassForTypeOnBank(VecTy, VecRB, true);
3829 if (!VecRC) {
3830 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3831 return nullptr;
3832 }
3833
3834 // The register that we're going to copy into.
3835 Register InsertReg = VecReg;
3836 if (!DstReg)
3837 DstReg = MRI.createVirtualRegister(DstRC);
3838 // If the lane index is 0, we just use a subregister COPY.
3839 if (LaneIdx == 0) {
3840 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3841 .addReg(VecReg, 0, ExtractSubReg);
3842 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3843 return &*Copy;
3844 }
3845
3846 // Lane copies require 128-bit wide registers. If we're dealing with an
3847 // unpacked vector, then we need to move up to that width. Insert an implicit
3848 // def and a subregister insert to get us there.
3849 if (VecTy.getSizeInBits() != 128) {
3850 MachineInstr *ScalarToVector = emitScalarToVector(
3851 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3852 if (!ScalarToVector)
3853 return nullptr;
3854 InsertReg = ScalarToVector->getOperand(0).getReg();
3855 }
3856
3857 MachineInstr *LaneCopyMI =
3858 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3859 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3860
3861 // Make sure that we actually constrain the initial copy.
3862 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3863 return LaneCopyMI;
3864}
3865
3866bool AArch64InstructionSelector::selectExtractElt(
3868 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3869 "unexpected opcode!");
3870 Register DstReg = I.getOperand(0).getReg();
3871 const LLT NarrowTy = MRI.getType(DstReg);
3872 const Register SrcReg = I.getOperand(1).getReg();
3873 const LLT WideTy = MRI.getType(SrcReg);
3874 (void)WideTy;
3875 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3876 "source register size too small!");
3877 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3878
3879 // Need the lane index to determine the correct copy opcode.
3880 MachineOperand &LaneIdxOp = I.getOperand(2);
3881 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3882
3883 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3884 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3885 return false;
3886 }
3887
3888 // Find the index to extract from.
3889 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3890 if (!VRegAndVal)
3891 return false;
3892 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3893
3894
3895 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3896 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3897 LaneIdx, MIB);
3898 if (!Extract)
3899 return false;
3900
3901 I.eraseFromParent();
3902 return true;
3903}
3904
3905bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3907 unsigned NumElts = I.getNumOperands() - 1;
3908 Register SrcReg = I.getOperand(NumElts).getReg();
3909 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3910 const LLT SrcTy = MRI.getType(SrcReg);
3911
3912 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3913 if (SrcTy.getSizeInBits() > 128) {
3914 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3915 return false;
3916 }
3917
3918 // We implement a split vector operation by treating the sub-vectors as
3919 // scalars and extracting them.
3920 const RegisterBank &DstRB =
3921 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3922 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3923 Register Dst = I.getOperand(OpIdx).getReg();
3924 MachineInstr *Extract =
3925 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3926 if (!Extract)
3927 return false;
3928 }
3929 I.eraseFromParent();
3930 return true;
3931}
3932
3933bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
3935 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3936 "unexpected opcode");
3937
3938 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3939 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3940 AArch64::FPRRegBankID ||
3941 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3942 AArch64::FPRRegBankID) {
3943 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3944 "currently unsupported.\n");
3945 return false;
3946 }
3947
3948 // The last operand is the vector source register, and every other operand is
3949 // a register to unpack into.
3950 unsigned NumElts = I.getNumOperands() - 1;
3951 Register SrcReg = I.getOperand(NumElts).getReg();
3952 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3953 const LLT WideTy = MRI.getType(SrcReg);
3954 (void)WideTy;
3955 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3956 "can only unmerge from vector or s128 types!");
3957 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3958 "source register size too small!");
3959
3960 if (!NarrowTy.isScalar())
3961 return selectSplitVectorUnmerge(I, MRI);
3962
3963 // Choose a lane copy opcode and subregister based off of the size of the
3964 // vector's elements.
3965 unsigned CopyOpc = 0;
3966 unsigned ExtractSubReg = 0;
3967 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3968 return false;
3969
3970 // Set up for the lane copies.
3971 MachineBasicBlock &MBB = *I.getParent();
3972
3973 // Stores the registers we'll be copying from.
3974 SmallVector<Register, 4> InsertRegs;
3975
3976 // We'll use the first register twice, so we only need NumElts-1 registers.
3977 unsigned NumInsertRegs = NumElts - 1;
3978
3979 // If our elements fit into exactly 128 bits, then we can copy from the source
3980 // directly. Otherwise, we need to do a bit of setup with some subregister
3981 // inserts.
3982 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3983 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3984 } else {
3985 // No. We have to perform subregister inserts. For each insert, create an
3986 // implicit def and a subregister insert, and save the register we create.
3987 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
3988 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
3989 *RBI.getRegBank(SrcReg, MRI, TRI));
3990 unsigned SubReg = 0;
3991 bool Found = getSubRegForClass(RC, TRI, SubReg);
3992 (void)Found;
3993 assert(Found && "expected to find last operand's subeg idx");
3994 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3995 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3996 MachineInstr &ImpDefMI =
3997 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3998 ImpDefReg);
3999
4000 // Now, create the subregister insert from SrcReg.
4001 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4002 MachineInstr &InsMI =
4003 *BuildMI(MBB, I, I.getDebugLoc(),
4004 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4005 .addUse(ImpDefReg)
4006 .addUse(SrcReg)
4007 .addImm(SubReg);
4008
4009 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4011
4012 // Save the register so that we can copy from it after.
4013 InsertRegs.push_back(InsertReg);
4014 }
4015 }
4016
4017 // Now that we've created any necessary subregister inserts, we can
4018 // create the copies.
4019 //
4020 // Perform the first copy separately as a subregister copy.
4021 Register CopyTo = I.getOperand(0).getReg();
4022 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4023 .addReg(InsertRegs[0], 0, ExtractSubReg);
4024 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4025
4026 // Now, perform the remaining copies as vector lane copies.
4027 unsigned LaneIdx = 1;
4028 for (Register InsReg : InsertRegs) {
4029 Register CopyTo = I.getOperand(LaneIdx).getReg();
4030 MachineInstr &CopyInst =
4031 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4032 .addUse(InsReg)
4033 .addImm(LaneIdx);
4034 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4035 ++LaneIdx;
4036 }
4037
4038 // Separately constrain the first copy's destination. Because of the
4039 // limitation in constrainOperandRegClass, we can't guarantee that this will
4040 // actually be constrained. So, do it ourselves using the second operand.
4041 const TargetRegisterClass *RC =
4042 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4043 if (!RC) {
4044 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4045 return false;
4046 }
4047
4048 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4049 I.eraseFromParent();
4050 return true;
4051}
4052
4053bool AArch64InstructionSelector::selectConcatVectors(
4055 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4056 "Unexpected opcode");
4057 Register Dst = I.getOperand(0).getReg();
4058 Register Op1 = I.getOperand(1).getReg();
4059 Register Op2 = I.getOperand(2).getReg();
4060 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4061 if (!ConcatMI)
4062 return false;
4063 I.eraseFromParent();
4064 return true;
4065}
4066
4067unsigned
4068AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4069 MachineFunction &MF) const {
4070 Type *CPTy = CPVal->getType();
4071 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4072
4074 return MCP->getConstantPoolIndex(CPVal, Alignment);
4075}
4076
4077MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4078 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4079 const TargetRegisterClass *RC;
4080 unsigned Opc;
4081 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4082 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4083 switch (Size) {
4084 case 16:
4085 RC = &AArch64::FPR128RegClass;
4086 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4087 break;
4088 case 8:
4089 RC = &AArch64::FPR64RegClass;
4090 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4091 break;
4092 case 4:
4093 RC = &AArch64::FPR32RegClass;
4094 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4095 break;
4096 case 2:
4097 RC = &AArch64::FPR16RegClass;
4098 Opc = AArch64::LDRHui;
4099 break;
4100 default:
4101 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4102 << *CPVal->getType());
4103 return nullptr;
4104 }
4105
4106 MachineInstr *LoadMI = nullptr;
4107 auto &MF = MIRBuilder.getMF();
4108 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4109 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4110 // Use load(literal) for tiny code model.
4111 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4112 } else {
4113 auto Adrp =
4114 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4115 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4116
4117 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4118 .addConstantPoolIndex(
4120
4122 }
4123
4125 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4127 Size, Align(Size)));
4129 return LoadMI;
4130}
4131
4132/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4133/// size and RB.
4134static std::pair<unsigned, unsigned>
4135getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4136 unsigned Opc, SubregIdx;
4137 if (RB.getID() == AArch64::GPRRegBankID) {
4138 if (EltSize == 8) {
4139 Opc = AArch64::INSvi8gpr;
4140 SubregIdx = AArch64::bsub;
4141 } else if (EltSize == 16) {
4142 Opc = AArch64::INSvi16gpr;
4143 SubregIdx = AArch64::ssub;
4144 } else if (EltSize == 32) {
4145 Opc = AArch64::INSvi32gpr;
4146 SubregIdx = AArch64::ssub;
4147 } else if (EltSize == 64) {
4148 Opc = AArch64::INSvi64gpr;
4149 SubregIdx = AArch64::dsub;
4150 } else {
4151 llvm_unreachable("invalid elt size!");
4152 }
4153 } else {
4154 if (EltSize == 8) {
4155 Opc = AArch64::INSvi8lane;
4156 SubregIdx = AArch64::bsub;
4157 } else if (EltSize == 16) {
4158 Opc = AArch64::INSvi16lane;
4159 SubregIdx = AArch64::hsub;
4160 } else if (EltSize == 32) {
4161 Opc = AArch64::INSvi32lane;
4162 SubregIdx = AArch64::ssub;
4163 } else if (EltSize == 64) {
4164 Opc = AArch64::INSvi64lane;
4165 SubregIdx = AArch64::dsub;
4166 } else {
4167 llvm_unreachable("invalid elt size!");
4168 }
4169 }
4170 return std::make_pair(Opc, SubregIdx);
4171}
4172
4173MachineInstr *AArch64InstructionSelector::emitInstr(
4174 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4175 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4176 const ComplexRendererFns &RenderFns) const {
4177 assert(Opcode && "Expected an opcode?");
4178 assert(!isPreISelGenericOpcode(Opcode) &&
4179 "Function should only be used to produce selected instructions!");
4180 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4181 if (RenderFns)
4182 for (auto &Fn : *RenderFns)
4183 Fn(MI);
4185 return &*MI;
4186}
4187
4188MachineInstr *AArch64InstructionSelector::emitAddSub(
4189 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4190 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4191 MachineIRBuilder &MIRBuilder) const {
4192 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4193 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4194 auto Ty = MRI.getType(LHS.getReg());
4195 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4196 unsigned Size = Ty.getSizeInBits();
4197 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4198 bool Is32Bit = Size == 32;
4199
4200 // INSTRri form with positive arithmetic immediate.
4201 if (auto Fns = selectArithImmed(RHS))
4202 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4203 MIRBuilder, Fns);
4204
4205 // INSTRri form with negative arithmetic immediate.
4206 if (auto Fns = selectNegArithImmed(RHS))
4207 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4208 MIRBuilder, Fns);
4209
4210 // INSTRrx form.
4211 if (auto Fns = selectArithExtendedRegister(RHS))
4212 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4213 MIRBuilder, Fns);
4214
4215 // INSTRrs form.
4216 if (auto Fns = selectShiftedRegister(RHS))
4217 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4218 MIRBuilder, Fns);
4219 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4220 MIRBuilder);
4221}
4222
4224AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4225 MachineOperand &RHS,
4226 MachineIRBuilder &MIRBuilder) const {
4227 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4228 {{AArch64::ADDXri, AArch64::ADDWri},
4229 {AArch64::ADDXrs, AArch64::ADDWrs},
4230 {AArch64::ADDXrr, AArch64::ADDWrr},
4231 {AArch64::SUBXri, AArch64::SUBWri},
4232 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4233 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4234}
4235
4237AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4238 MachineOperand &RHS,
4239 MachineIRBuilder &MIRBuilder) const {
4240 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4241 {{AArch64::ADDSXri, AArch64::ADDSWri},
4242 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4243 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4244 {AArch64::SUBSXri, AArch64::SUBSWri},
4245 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4246 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4247}
4248
4250AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4251 MachineOperand &RHS,
4252 MachineIRBuilder &MIRBuilder) const {
4253 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4254 {{AArch64::SUBSXri, AArch64::SUBSWri},
4255 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4256 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4257 {AArch64::ADDSXri, AArch64::ADDSWri},
4258 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4259 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4260}
4261
4263AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4264 MachineOperand &RHS,
4265 MachineIRBuilder &MIRBuilder) const {
4266 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4267 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4268 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4269 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4270 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4271}
4272
4274AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4275 MachineOperand &RHS,
4276 MachineIRBuilder &MIRBuilder) const {
4277 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4278 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4279 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4280 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4281 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4282}
4283
4285AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4286 MachineIRBuilder &MIRBuilder) const {
4287 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4288 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4289 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4290 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4291}
4292
4294AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4295 MachineIRBuilder &MIRBuilder) const {
4296 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4297 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4298 LLT Ty = MRI.getType(LHS.getReg());
4299 unsigned RegSize = Ty.getSizeInBits();
4300 bool Is32Bit = (RegSize == 32);
4301 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4302 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4303 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4304 // ANDS needs a logical immediate for its immediate form. Check if we can
4305 // fold one in.
4306 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4307 int64_t Imm = ValAndVReg->Value.getSExtValue();
4308
4310 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4313 return &*TstMI;
4314 }
4315 }
4316
4317 if (auto Fns = selectLogicalShiftedRegister(RHS))
4318 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4319 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4320}
4321
4322MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4323 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4324 MachineIRBuilder &MIRBuilder) const {
4325 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4326 assert(Predicate.isPredicate() && "Expected predicate?");
4327 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4328 LLT CmpTy = MRI.getType(LHS.getReg());
4329 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4330 unsigned Size = CmpTy.getSizeInBits();
4331 (void)Size;
4332 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4333 // Fold the compare into a cmn or tst if possible.
4334 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4335 return FoldCmp;
4336 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4337 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4338}
4339
4340MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4341 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4342 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4343#ifndef NDEBUG
4344 LLT Ty = MRI.getType(Dst);
4345 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4346 "Expected a 32-bit scalar register?");
4347#endif
4348 const Register ZReg = AArch64::WZR;
4349 AArch64CC::CondCode CC1, CC2;
4350 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4351 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4352 if (CC2 == AArch64CC::AL)
4353 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4354 MIRBuilder);
4355 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4356 Register Def1Reg = MRI.createVirtualRegister(RC);
4357 Register Def2Reg = MRI.createVirtualRegister(RC);
4358 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4359 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4360 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4361 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4363 return &*OrMI;
4364}
4365
4366MachineInstr *AArch64InstructionSelector::emitFPCompare(
4367 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4368 std::optional<CmpInst::Predicate> Pred) const {
4369 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4370 LLT Ty = MRI.getType(LHS);
4371 if (Ty.isVector())
4372 return nullptr;
4373 unsigned OpSize = Ty.getSizeInBits();
4374 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4375
4376 // If this is a compare against +0.0, then we don't have
4377 // to explicitly materialize a constant.
4378 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4379 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4380
4381 auto IsEqualityPred = [](CmpInst::Predicate P) {
4382 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4384 };
4385 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4386 // Try commutating the operands.
4387 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4388 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4389 ShouldUseImm = true;
4390 std::swap(LHS, RHS);
4391 }
4392 }
4393 unsigned CmpOpcTbl[2][3] = {
4394 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4395 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4396 unsigned CmpOpc =
4397 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4398
4399 // Partially build the compare. Decide if we need to add a use for the
4400 // third operand based off whether or not we're comparing against 0.0.
4401 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4403 if (!ShouldUseImm)
4404 CmpMI.addUse(RHS);
4406 return &*CmpMI;
4407}
4408
4409MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4410 std::optional<Register> Dst, Register Op1, Register Op2,
4411 MachineIRBuilder &MIRBuilder) const {
4412 // We implement a vector concat by:
4413 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4414 // 2. Insert the upper vector into the destination's upper element
4415 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4416 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4417
4418 const LLT Op1Ty = MRI.getType(Op1);
4419 const LLT Op2Ty = MRI.getType(Op2);
4420
4421 if (Op1Ty != Op2Ty) {
4422 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4423 return nullptr;
4424 }
4425 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4426
4427 if (Op1Ty.getSizeInBits() >= 128) {
4428 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4429 return nullptr;
4430 }
4431
4432 // At the moment we just support 64 bit vector concats.
4433 if (Op1Ty.getSizeInBits() != 64) {
4434 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4435 return nullptr;
4436 }
4437
4438 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4439 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4440 const TargetRegisterClass *DstRC =
4441 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4442
4443 MachineInstr *WidenedOp1 =
4444 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4445 MachineInstr *WidenedOp2 =
4446 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4447 if (!WidenedOp1 || !WidenedOp2) {
4448 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4449 return nullptr;
4450 }
4451
4452 // Now do the insert of the upper element.
4453 unsigned InsertOpc, InsSubRegIdx;
4454 std::tie(InsertOpc, InsSubRegIdx) =
4455 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4456
4457 if (!Dst)
4458 Dst = MRI.createVirtualRegister(DstRC);
4459 auto InsElt =
4460 MIRBuilder
4461 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4462 .addImm(1) /* Lane index */
4463 .addUse(WidenedOp2->getOperand(0).getReg())
4464 .addImm(0);
4466 return &*InsElt;
4467}
4468
4470AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4471 Register Src2, AArch64CC::CondCode Pred,
4472 MachineIRBuilder &MIRBuilder) const {
4473 auto &MRI = *MIRBuilder.getMRI();
4474 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4475 // If we used a register class, then this won't necessarily have an LLT.
4476 // Compute the size based off whether or not we have a class or bank.
4477 unsigned Size;
4478 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4479 Size = TRI.getRegSizeInBits(*RC);
4480 else
4481 Size = MRI.getType(Dst).getSizeInBits();
4482 // Some opcodes use s1.
4483 assert(Size <= 64 && "Expected 64 bits or less only!");
4484 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4485 unsigned Opc = OpcTable[Size == 64];
4486 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4488 return &*CSINC;
4489}
4490
4491MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4492 Register CarryReg) {
4494 unsigned Opcode = I.getOpcode();
4495
4496 // If the instruction is a SUB, we need to negate the carry,
4497 // because borrowing is indicated by carry-flag == 0.
4498 bool NeedsNegatedCarry =
4499 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4500
4501 // If the previous instruction will already produce the correct carry, do not
4502 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4503 // generated during legalization of wide add/sub. This optimization depends on
4504 // these sequences not being interrupted by other instructions.
4505 // We have to select the previous instruction before the carry-using
4506 // instruction is deleted by the calling function, otherwise the previous
4507 // instruction might become dead and would get deleted.
4508 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4509 if (SrcMI == I.getPrevNode()) {
4510 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4511 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4512 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4513 CarrySrcMI->isUnsigned() &&
4514 CarrySrcMI->getCarryOutReg() == CarryReg &&
4515 selectAndRestoreState(*SrcMI))
4516 return nullptr;
4517 }
4518 }
4519
4520 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4521
4522 if (NeedsNegatedCarry) {
4523 // (0 - Carry) sets !C in NZCV when Carry == 1
4524 Register ZReg = AArch64::WZR;
4525 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4526 }
4527
4528 // (Carry - 1) sets !C in NZCV when Carry == 0
4529 auto Fns = select12BitValueWithLeftShift(1);
4530 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4531}
4532
4533bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4535 auto &CarryMI = cast<GAddSubCarryOut>(I);
4536
4537 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4538 // Set NZCV carry according to carry-in VReg
4539 emitCarryIn(I, CarryInMI->getCarryInReg());
4540 }
4541
4542 // Emit the operation and get the correct condition code.
4543 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4544 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4545
4546 Register CarryOutReg = CarryMI.getCarryOutReg();
4547
4548 // Don't convert carry-out to VReg if it is never used
4549 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4550 // Now, put the overflow result in the register given by the first operand
4551 // to the overflow op. CSINC increments the result when the predicate is
4552 // false, so to get the increment when it's true, we need to use the
4553 // inverse. In this case, we want to increment when carry is set.
4554 Register ZReg = AArch64::WZR;
4555 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4556 getInvertedCondCode(OpAndCC.second), MIB);
4557 }
4558
4559 I.eraseFromParent();
4560 return true;
4561}
4562
4563std::pair<MachineInstr *, AArch64CC::CondCode>
4564AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4565 MachineOperand &LHS,
4566 MachineOperand &RHS,
4567 MachineIRBuilder &MIRBuilder) const {
4568 switch (Opcode) {
4569 default:
4570 llvm_unreachable("Unexpected opcode!");
4571 case TargetOpcode::G_SADDO:
4572 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4573 case TargetOpcode::G_UADDO:
4574 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4575 case TargetOpcode::G_SSUBO:
4576 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4577 case TargetOpcode::G_USUBO:
4578 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4579 case TargetOpcode::G_SADDE:
4580 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4581 case TargetOpcode::G_UADDE:
4582 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4583 case TargetOpcode::G_SSUBE:
4584 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4585 case TargetOpcode::G_USUBE:
4586 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4587 }
4588}
4589
4590/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4591/// expressed as a conjunction.
4592/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4593/// changing the conditions on the CMP tests.
4594/// (this means we can call emitConjunctionRec() with
4595/// Negate==true on this sub-tree)
4596/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4597/// cannot do the negation naturally. We are required to
4598/// emit the subtree first in this case.
4599/// \param WillNegate Is true if are called when the result of this
4600/// subexpression must be negated. This happens when the
4601/// outer expression is an OR. We can use this fact to know
4602/// that we have a double negation (or (or ...) ...) that
4603/// can be implemented for free.
4604static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4605 bool WillNegate, MachineRegisterInfo &MRI,
4606 unsigned Depth = 0) {
4607 if (!MRI.hasOneNonDBGUse(Val))
4608 return false;
4609 MachineInstr *ValDef = MRI.getVRegDef(Val);
4610 unsigned Opcode = ValDef->getOpcode();
4611 if (isa<GAnyCmp>(ValDef)) {
4612 CanNegate = true;
4613 MustBeFirst = false;
4614 return true;
4615 }
4616 // Protect against exponential runtime and stack overflow.
4617 if (Depth > 6)
4618 return false;
4619 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4620 bool IsOR = Opcode == TargetOpcode::G_OR;
4621 Register O0 = ValDef->getOperand(1).getReg();
4622 Register O1 = ValDef->getOperand(2).getReg();
4623 bool CanNegateL;
4624 bool MustBeFirstL;
4625 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4626 return false;
4627 bool CanNegateR;
4628 bool MustBeFirstR;
4629 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4630 return false;
4631
4632 if (MustBeFirstL && MustBeFirstR)
4633 return false;
4634
4635 if (IsOR) {
4636 // For an OR expression we need to be able to naturally negate at least
4637 // one side or we cannot do the transformation at all.
4638 if (!CanNegateL && !CanNegateR)
4639 return false;
4640 // If we the result of the OR will be negated and we can naturally negate
4641 // the leaves, then this sub-tree as a whole negates naturally.
4642 CanNegate = WillNegate && CanNegateL && CanNegateR;
4643 // If we cannot naturally negate the whole sub-tree, then this must be
4644 // emitted first.
4645 MustBeFirst = !CanNegate;
4646 } else {
4647 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4648 // We cannot naturally negate an AND operation.
4649 CanNegate = false;
4650 MustBeFirst = MustBeFirstL || MustBeFirstR;
4651 }
4652 return true;
4653 }
4654 return false;
4655}
4656
4657MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4660 MachineIRBuilder &MIB) const {
4661 // TODO: emit CMN as an optimization.
4662 auto &MRI = *MIB.getMRI();
4663 LLT OpTy = MRI.getType(LHS);
4664 unsigned CCmpOpc;
4665 std::optional<ValueAndVReg> C;
4667 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4669 if (C && C->Value.ult(32))
4670 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4671 else
4672 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4673 } else {
4674 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4675 OpTy.getSizeInBits() == 64);
4676 switch (OpTy.getSizeInBits()) {
4677 case 16:
4678 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4679 CCmpOpc = AArch64::FCCMPHrr;
4680 break;
4681 case 32:
4682 CCmpOpc = AArch64::FCCMPSrr;
4683 break;
4684 case 64:
4685 CCmpOpc = AArch64::FCCMPDrr;
4686 break;
4687 default:
4688 return nullptr;
4689 }
4690 }
4692 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4693 auto CCmp =
4694 MIB.buildInstr(CCmpOpc, {}, {LHS});
4695 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4696 CCmp.addImm(C->Value.getZExtValue());
4697 else
4698 CCmp.addReg(RHS);
4699 CCmp.addImm(NZCV).addImm(Predicate);
4701 return &*CCmp;
4702}
4703
4704MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4705 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4706 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4707 // We're at a tree leaf, produce a conditional comparison operation.
4708 auto &MRI = *MIB.getMRI();
4709 MachineInstr *ValDef = MRI.getVRegDef(Val);
4710 unsigned Opcode = ValDef->getOpcode();
4711 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4712 Register LHS = Cmp->getLHSReg();
4713 Register RHS = Cmp->getRHSReg();
4714 CmpInst::Predicate CC = Cmp->getCond();
4715 if (Negate)
4717 if (isa<GICmp>(Cmp)) {
4719 } else {
4720 // Handle special FP cases.
4721 AArch64CC::CondCode ExtraCC;
4722 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4723 // Some floating point conditions can't be tested with a single condition
4724 // code. Construct an additional comparison in this case.
4725 if (ExtraCC != AArch64CC::AL) {
4726 MachineInstr *ExtraCmp;
4727 if (!CCOp)
4728 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4729 else
4730 ExtraCmp =
4731 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4732 CCOp = ExtraCmp->getOperand(0).getReg();
4733 Predicate = ExtraCC;
4734 }
4735 }
4736
4737 // Produce a normal comparison if we are first in the chain
4738 if (!CCOp) {
4739 auto Dst = MRI.cloneVirtualRegister(LHS);
4740 if (isa<GICmp>(Cmp))
4741 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4742 return emitFPCompare(Cmp->getOperand(2).getReg(),
4743 Cmp->getOperand(3).getReg(), MIB);
4744 }
4745 // Otherwise produce a ccmp.
4746 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4747 }
4748 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4749
4750 bool IsOR = Opcode == TargetOpcode::G_OR;
4751
4752 Register LHS = ValDef->getOperand(1).getReg();
4753 bool CanNegateL;
4754 bool MustBeFirstL;
4755 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4756 assert(ValidL && "Valid conjunction/disjunction tree");
4757 (void)ValidL;
4758
4759 Register RHS = ValDef->getOperand(2).getReg();
4760 bool CanNegateR;
4761 bool MustBeFirstR;
4762 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4763 assert(ValidR && "Valid conjunction/disjunction tree");
4764 (void)ValidR;
4765
4766 // Swap sub-tree that must come first to the right side.
4767 if (MustBeFirstL) {
4768 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4769 std::swap(LHS, RHS);
4770 std::swap(CanNegateL, CanNegateR);
4771 std::swap(MustBeFirstL, MustBeFirstR);
4772 }
4773
4774 bool NegateR;
4775 bool NegateAfterR;
4776 bool NegateL;
4777 bool NegateAfterAll;
4778 if (Opcode == TargetOpcode::G_OR) {
4779 // Swap the sub-tree that we can negate naturally to the left.
4780 if (!CanNegateL) {
4781 assert(CanNegateR && "at least one side must be negatable");
4782 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4783 assert(!Negate);
4784 std::swap(LHS, RHS);
4785 NegateR = false;
4786 NegateAfterR = true;
4787 } else {
4788 // Negate the left sub-tree if possible, otherwise negate the result.
4789 NegateR = CanNegateR;
4790 NegateAfterR = !CanNegateR;
4791 }
4792 NegateL = true;
4793 NegateAfterAll = !Negate;
4794 } else {
4795 assert(Opcode == TargetOpcode::G_AND &&
4796 "Valid conjunction/disjunction tree");
4797 assert(!Negate && "Valid conjunction/disjunction tree");
4798
4799 NegateL = false;
4800 NegateR = false;
4801 NegateAfterR = false;
4802 NegateAfterAll = false;
4803 }
4804
4805 // Emit sub-trees.
4806 AArch64CC::CondCode RHSCC;
4807 MachineInstr *CmpR =
4808 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4809 if (NegateAfterR)
4810 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4812 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4813 if (NegateAfterAll)
4814 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4815 return CmpL;
4816}
4817
4818MachineInstr *AArch64InstructionSelector::emitConjunction(
4819 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4820 bool DummyCanNegate;
4821 bool DummyMustBeFirst;
4822 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4823 *MIB.getMRI()))
4824 return nullptr;
4825 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4826}
4827
4828bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4829 MachineInstr &CondMI) {
4830 AArch64CC::CondCode AArch64CC;
4831 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4832 if (!ConjMI)
4833 return false;
4834
4835 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4836 SelI.eraseFromParent();
4837 return true;
4838}
4839
4840bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4841 MachineRegisterInfo &MRI = *MIB.getMRI();
4842 // We want to recognize this pattern:
4843 //
4844 // $z = G_FCMP pred, $x, $y
4845 // ...
4846 // $w = G_SELECT $z, $a, $b
4847 //
4848 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4849 // some copies/truncs in between.)
4850 //
4851 // If we see this, then we can emit something like this:
4852 //
4853 // fcmp $x, $y
4854 // fcsel $w, $a, $b, pred
4855 //
4856 // Rather than emitting both of the rather long sequences in the standard
4857 // G_FCMP/G_SELECT select methods.
4858
4859 // First, check if the condition is defined by a compare.
4860 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4861
4862 // We can only fold if all of the defs have one use.
4863 Register CondDefReg = CondDef->getOperand(0).getReg();
4864 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4865 // Unless it's another select.
4866 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4867 if (CondDef == &UI)
4868 continue;
4869 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4870 return false;
4871 }
4872 }
4873
4874 // Is the condition defined by a compare?
4875 unsigned CondOpc = CondDef->getOpcode();
4876 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4877 if (tryOptSelectConjunction(I, *CondDef))
4878 return true;
4879 return false;
4880 }
4881
4883 if (CondOpc == TargetOpcode::G_ICMP) {
4884 auto Pred =
4885 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4887 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4888 CondDef->getOperand(1), MIB);
4889 } else {
4890 // Get the condition code for the select.
4891 auto Pred =
4892 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4893 AArch64CC::CondCode CondCode2;
4894 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4895
4896 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4897 // instructions to emit the comparison.
4898 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4899 // unnecessary.
4900 if (CondCode2 != AArch64CC::AL)
4901 return false;
4902
4903 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4904 CondDef->getOperand(3).getReg(), MIB)) {
4905 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4906 return false;
4907 }
4908 }
4909
4910 // Emit the select.
4911 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4912 I.getOperand(3).getReg(), CondCode, MIB);
4913 I.eraseFromParent();
4914 return true;
4915}
4916
4917MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4918 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4919 MachineIRBuilder &MIRBuilder) const {
4920 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4921 "Unexpected MachineOperand");
4922 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4923 // We want to find this sort of thing:
4924 // x = G_SUB 0, y
4925 // G_ICMP z, x
4926 //
4927 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4928 // e.g:
4929 //
4930 // cmn z, y
4931
4932 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4933 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4934 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4935 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4936 // Given this:
4937 //
4938 // x = G_SUB 0, y
4939 // G_ICMP x, z
4940 //
4941 // Produce this:
4942 //
4943 // cmn y, z
4944 if (isCMN(LHSDef, P, MRI))
4945 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4946
4947 // Same idea here, but with the RHS of the compare instead:
4948 //
4949 // Given this:
4950 //
4951 // x = G_SUB 0, y
4952 // G_ICMP z, x
4953 //
4954 // Produce this:
4955 //
4956 // cmn z, y
4957 if (isCMN(RHSDef, P, MRI))
4958 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4959
4960 // Given this:
4961 //
4962 // z = G_AND x, y
4963 // G_ICMP z, 0
4964 //
4965 // Produce this if the compare is signed:
4966 //
4967 // tst x, y
4968 if (!CmpInst::isUnsigned(P) && LHSDef &&
4969 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4970 // Make sure that the RHS is 0.
4971 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4972 if (!ValAndVReg || ValAndVReg->Value != 0)
4973 return nullptr;
4974
4975 return emitTST(LHSDef->getOperand(1),
4976 LHSDef->getOperand(2), MIRBuilder);
4977 }
4978
4979 return nullptr;
4980}
4981
4982bool AArch64InstructionSelector::selectShuffleVector(
4984 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4985 Register Src1Reg = I.getOperand(1).getReg();
4986 const LLT Src1Ty = MRI.getType(Src1Reg);
4987 Register Src2Reg = I.getOperand(2).getReg();
4988 const LLT Src2Ty = MRI.getType(Src2Reg);
4989 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4990
4991 MachineBasicBlock &MBB = *I.getParent();
4992 MachineFunction &MF = *MBB.getParent();
4993 LLVMContext &Ctx = MF.getFunction().getContext();
4994
4995 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4996 // it's originated from a <1 x T> type. Those should have been lowered into
4997 // G_BUILD_VECTOR earlier.
4998 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4999 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5000 return false;
5001 }
5002
5003 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5004
5006 for (int Val : Mask) {
5007 // For now, any undef indexes we'll just assume to be 0. This should be
5008 // optimized in future, e.g. to select DUP etc.
5009 Val = Val < 0 ? 0 : Val;
5010 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5011 unsigned Offset = Byte + Val * BytesPerElt;
5012 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5013 }
5014 }
5015
5016 // Use a constant pool to load the index vector for TBL.
5017 Constant *CPVal = ConstantVector::get(CstIdxs);
5018 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5019 if (!IndexLoad) {
5020 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5021 return false;
5022 }
5023
5024 if (DstTy.getSizeInBits() != 128) {
5025 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5026 // This case can be done with TBL1.
5028 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5029 if (!Concat) {
5030 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5031 return false;
5032 }
5033
5034 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5035 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5036 IndexLoad->getOperand(0).getReg(), MIB);
5037
5038 auto TBL1 = MIB.buildInstr(
5039 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5040 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5042
5043 auto Copy =
5044 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5045 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5046 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5047 I.eraseFromParent();
5048 return true;
5049 }
5050
5051 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5052 // Q registers for regalloc.
5053 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5054 auto RegSeq = createQTuple(Regs, MIB);
5055 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5056 {RegSeq, IndexLoad->getOperand(0)});
5058 I.eraseFromParent();
5059 return true;
5060}
5061
5062MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5063 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5064 unsigned LaneIdx, const RegisterBank &RB,
5065 MachineIRBuilder &MIRBuilder) const {
5066 MachineInstr *InsElt = nullptr;
5067 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5068 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5069
5070 // Create a register to define with the insert if one wasn't passed in.
5071 if (!DstReg)
5072 DstReg = MRI.createVirtualRegister(DstRC);
5073
5074 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5075 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5076
5077 if (RB.getID() == AArch64::FPRRegBankID) {
5078 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5079 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5080 .addImm(LaneIdx)
5081 .addUse(InsSub->getOperand(0).getReg())
5082 .addImm(0);
5083 } else {
5084 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5085 .addImm(LaneIdx)
5086 .addUse(EltReg);
5087 }
5088
5090 return InsElt;
5091}
5092
5093bool AArch64InstructionSelector::selectUSMovFromExtend(
5095 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5096 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5097 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5098 return false;
5099 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5100 const Register DefReg = MI.getOperand(0).getReg();
5101 const LLT DstTy = MRI.getType(DefReg);
5102 unsigned DstSize = DstTy.getSizeInBits();
5103
5104 if (DstSize != 32 && DstSize != 64)
5105 return false;
5106
5107 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5108 MI.getOperand(1).getReg(), MRI);
5109 int64_t Lane;
5110 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5111 return false;
5112 Register Src0 = Extract->getOperand(1).getReg();
5113
5114 const LLT &VecTy = MRI.getType(Src0);
5115
5116 if (VecTy.getSizeInBits() != 128) {
5117 const MachineInstr *ScalarToVector = emitScalarToVector(
5118 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5119 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5120 Src0 = ScalarToVector->getOperand(0).getReg();
5121 }
5122
5123 unsigned Opcode;
5124 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5125 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5126 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5127 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5128 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5129 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5130 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5131 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5132 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5133 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5134 else
5135 llvm_unreachable("Unexpected type combo for S/UMov!");
5136
5137 // We may need to generate one of these, depending on the type and sign of the
5138 // input:
5139 // DstReg = SMOV Src0, Lane;
5140 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5141 MachineInstr *ExtI = nullptr;
5142 if (DstSize == 64 && !IsSigned) {
5143 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5144 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5145 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5146 .addImm(0)
5147 .addUse(NewReg)
5148 .addImm(AArch64::sub_32);
5149 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5150 } else
5151 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5152
5154 MI.eraseFromParent();
5155 return true;
5156}
5157
5158MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5159 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5160 unsigned int Op;
5161 if (DstSize == 128) {
5162 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5163 return nullptr;
5164 Op = AArch64::MOVIv16b_ns;
5165 } else {
5166 Op = AArch64::MOVIv8b_ns;
5167 }
5168
5169 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5170
5173 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5175 return &*Mov;
5176 }
5177 return nullptr;
5178}
5179
5180MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5181 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5182 bool Inv) {
5183
5184 unsigned int Op;
5185 if (DstSize == 128) {
5186 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5187 return nullptr;
5188 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5189 } else {
5190 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5191 }
5192
5193 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5194 uint64_t Shift;
5195
5198 Shift = 0;
5199 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5201 Shift = 8;
5202 } else
5203 return nullptr;
5204
5205 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5207 return &*Mov;
5208}
5209
5210MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5211 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5212 bool Inv) {
5213
5214 unsigned int Op;
5215 if (DstSize == 128) {
5216 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5217 return nullptr;
5218 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5219 } else {
5220 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5221 }
5222
5223 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5224 uint64_t Shift;
5225
5228 Shift = 0;
5229 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5231 Shift = 8;
5232 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5234 Shift = 16;
5235 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5237 Shift = 24;
5238 } else
5239 return nullptr;
5240
5241 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5243 return &*Mov;
5244}
5245
5246MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5247 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5248
5249 unsigned int Op;
5250 if (DstSize == 128) {
5251 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5252 return nullptr;
5253 Op = AArch64::MOVIv2d_ns;
5254 } else {
5255 Op = AArch64::MOVID;
5256 }
5257
5258 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5261 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5263 return &*Mov;
5264 }
5265 return nullptr;
5266}
5267
5268MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5269 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5270 bool Inv) {
5271
5272 unsigned int Op;
5273 if (DstSize == 128) {
5274 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5275 return nullptr;
5276 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5277 } else {
5278 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5279 }
5280
5281 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5282 uint64_t Shift;
5283
5286 Shift = 264;
5287 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5289 Shift = 272;
5290 } else
5291 return nullptr;
5292
5293 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5295 return &*Mov;
5296}
5297
5298MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5299 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5300
5301 unsigned int Op;
5302 bool IsWide = false;
5303 if (DstSize == 128) {
5304 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5305 return nullptr;
5306 Op = AArch64::FMOVv4f32_ns;
5307 IsWide = true;
5308 } else {
5309 Op = AArch64::FMOVv2f32_ns;
5310 }
5311
5312 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5313
5316 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5318 Op = AArch64::FMOVv2f64_ns;
5319 } else
5320 return nullptr;
5321
5322 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5324 return &*Mov;
5325}
5326
5327bool AArch64InstructionSelector::selectIndexedExtLoad(
5329 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5330 Register Dst = ExtLd.getDstReg();
5331 Register WriteBack = ExtLd.getWritebackReg();
5332 Register Base = ExtLd.getBaseReg();
5333 Register Offset = ExtLd.getOffsetReg();
5334 LLT Ty = MRI.getType(Dst);
5335 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5336 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5337 bool IsPre = ExtLd.isPre();
5338 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5339 bool InsertIntoXReg = false;
5340 bool IsDst64 = Ty.getSizeInBits() == 64;
5341
5342 unsigned Opc = 0;
5343 LLT NewLdDstTy;
5344 LLT s32 = LLT::scalar(32);
5345 LLT s64 = LLT::scalar(64);
5346
5347 if (MemSizeBits == 8) {
5348 if (IsSExt) {
5349 if (IsDst64)
5350 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5351 else
5352 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5353 NewLdDstTy = IsDst64 ? s64 : s32;
5354 } else {
5355 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5356 InsertIntoXReg = IsDst64;
5357 NewLdDstTy = s32;
5358 }
5359 } else if (MemSizeBits == 16) {
5360 if (IsSExt) {
5361 if (IsDst64)
5362 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5363 else
5364 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5365 NewLdDstTy = IsDst64 ? s64 : s32;
5366 } else {
5367 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5368 InsertIntoXReg = IsDst64;
5369 NewLdDstTy = s32;
5370 }
5371 } else if (MemSizeBits == 32) {
5372 if (IsSExt) {
5373 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5374 NewLdDstTy = s64;
5375 } else {
5376 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5377 InsertIntoXReg = IsDst64;
5378 NewLdDstTy = s32;
5379 }
5380 } else {
5381 llvm_unreachable("Unexpected size for indexed load");
5382 }
5383
5384 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5385 return false; // We should be on gpr.
5386
5387 auto Cst = getIConstantVRegVal(Offset, MRI);
5388 if (!Cst)
5389 return false; // Shouldn't happen, but just in case.
5390
5391 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5392 .addImm(Cst->getSExtValue());
5393 LdMI.cloneMemRefs(ExtLd);
5395 // Make sure to select the load with the MemTy as the dest type, and then
5396 // insert into X reg if needed.
5397 if (InsertIntoXReg) {
5398 // Generate a SUBREG_TO_REG.
5399 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5400 .addImm(0)
5401 .addUse(LdMI.getReg(1))
5402 .addImm(AArch64::sub_32);
5403 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5404 MRI);
5405 } else {
5406 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5407 selectCopy(*Copy, TII, MRI, TRI, RBI);
5408 }
5409 MI.eraseFromParent();
5410
5411 return true;
5412}
5413
5414bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5416 auto &Ld = cast<GIndexedLoad>(MI);
5417 Register Dst = Ld.getDstReg();
5418 Register WriteBack = Ld.getWritebackReg();
5419 Register Base = Ld.getBaseReg();
5420 Register Offset = Ld.getOffsetReg();
5421 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5422 "Unexpected type for indexed load");
5423 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5424
5425 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5426 return selectIndexedExtLoad(MI, MRI);
5427
5428 unsigned Opc = 0;
5429 if (Ld.isPre()) {
5430 static constexpr unsigned GPROpcodes[] = {
5431 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5432 AArch64::LDRXpre};
5433 static constexpr unsigned FPROpcodes[] = {
5434 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5435 AArch64::LDRQpre};
5436 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5437 Opc = FPROpcodes[Log2_32(MemSize)];
5438 else
5439 Opc = GPROpcodes[Log2_32(MemSize)];
5440 } else {
5441 static constexpr unsigned GPROpcodes[] = {
5442 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5443 AArch64::LDRXpost};
5444 static constexpr unsigned FPROpcodes[] = {
5445 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5446 AArch64::LDRDpost, AArch64::LDRQpost};
5447 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5448 Opc = FPROpcodes[Log2_32(MemSize)];
5449 else
5450 Opc = GPROpcodes[Log2_32(MemSize)];
5451 }
5452 auto Cst = getIConstantVRegVal(Offset, MRI);
5453 if (!Cst)
5454 return false; // Shouldn't happen, but just in case.
5455 auto LdMI =
5456 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5457 LdMI.cloneMemRefs(Ld);
5459 MI.eraseFromParent();
5460 return true;
5461}
5462
5463bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5465 Register Dst = I.getWritebackReg();
5466 Register Val = I.getValueReg();
5467 Register Base = I.getBaseReg();
5468 Register Offset = I.getOffsetReg();
5469 LLT ValTy = MRI.getType(Val);
5470 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5471
5472 unsigned Opc = 0;
5473 if (I.isPre()) {
5474 static constexpr unsigned GPROpcodes[] = {
5475 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5476 AArch64::STRXpre};
5477 static constexpr unsigned FPROpcodes[] = {
5478 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5479 AArch64::STRQpre};
5480
5481 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5482 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5483 else
5484 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5485 } else {
5486 static constexpr unsigned GPROpcodes[] = {
5487 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5488 AArch64::STRXpost};
5489 static constexpr unsigned FPROpcodes[] = {
5490 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5491 AArch64::STRDpost, AArch64::STRQpost};
5492
5493 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5494 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5495 else
5496 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5497 }
5498
5499 auto Cst = getIConstantVRegVal(Offset, MRI);
5500 if (!Cst)
5501 return false; // Shouldn't happen, but just in case.
5502 auto Str =
5503 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5504 Str.cloneMemRefs(I);
5506 I.eraseFromParent();
5507 return true;
5508}
5509
5511AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5512 MachineIRBuilder &MIRBuilder,
5514 LLT DstTy = MRI.getType(Dst);
5515 unsigned DstSize = DstTy.getSizeInBits();
5516 if (CV->isNullValue()) {
5517 if (DstSize == 128) {
5518 auto Mov =
5519 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5521 return &*Mov;
5522 }
5523
5524 if (DstSize == 64) {
5525 auto Mov =
5526 MIRBuilder
5527 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5528 .addImm(0);
5529 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5530 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5531 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5532 return &*Copy;
5533 }
5534 }
5535
5536 if (CV->getSplatValue()) {
5537 APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5538 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5539 MachineInstr *NewOp;
5540 bool Inv = false;
5541 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5542 (NewOp =
5543 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5544 (NewOp =
5545 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5546 (NewOp =
5547 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5548 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5549 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5550 return NewOp;
5551
5552 DefBits = ~DefBits;
5553 Inv = true;
5554 if ((NewOp =
5555 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5556 (NewOp =
5557 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5558 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5559 return NewOp;
5560 return nullptr;
5561 };
5562
5563 if (auto *NewOp = TryMOVIWithBits(DefBits))
5564 return NewOp;
5565
5566 // See if a fneg of the constant can be materialized with a MOVI, etc
5567 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5568 unsigned NegOpc) -> MachineInstr * {
5569 // FNegate each sub-element of the constant
5570 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5571 APInt NegBits(DstSize, 0);
5572 unsigned NumElts = DstSize / NumBits;
5573 for (unsigned i = 0; i < NumElts; i++)
5574 NegBits |= Neg << (NumBits * i);
5575 NegBits = DefBits ^ NegBits;
5576
5577 // Try to create the new constants with MOVI, and if so generate a fneg
5578 // for it.
5579 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5580 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5581 NewOp->getOperand(0).setReg(NewDst);
5582 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5583 }
5584 return nullptr;
5585 };
5586 MachineInstr *R;
5587 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5588 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5589 (STI.hasFullFP16() &&
5590 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5591 return R;
5592 }
5593
5594 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5595 if (!CPLoad) {
5596 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5597 return nullptr;
5598 }
5599
5600 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5601 RBI.constrainGenericRegister(
5602 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5603 return &*Copy;
5604}
5605
5606bool AArch64InstructionSelector::tryOptConstantBuildVec(
5608 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5609 unsigned DstSize = DstTy.getSizeInBits();
5610 assert(DstSize <= 128 && "Unexpected build_vec type!");
5611 if (DstSize < 32)
5612 return false;
5613 // Check if we're building a constant vector, in which case we want to
5614 // generate a constant pool load instead of a vector insert sequence.
5616 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5617 // Try to find G_CONSTANT or G_FCONSTANT
5618 auto *OpMI =
5619 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5620 if (OpMI)
5621 Csts.emplace_back(
5622 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5623 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5624 I.getOperand(Idx).getReg(), MRI)))
5625 Csts.emplace_back(
5626 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5627 else
5628 return false;
5629 }
5630 Constant *CV = ConstantVector::get(Csts);
5631 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5632 return false;
5633 I.eraseFromParent();
5634 return true;
5635}
5636
5637bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5639 // Given:
5640 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5641 //
5642 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5643 Register Dst = I.getOperand(0).getReg();
5644 Register EltReg = I.getOperand(1).getReg();
5645 LLT EltTy = MRI.getType(EltReg);
5646 // If the index isn't on the same bank as its elements, then this can't be a
5647 // SUBREG_TO_REG.
5648 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5649 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5650 if (EltRB != DstRB)
5651 return false;
5652 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5653 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5654 }))
5655 return false;
5656 unsigned SubReg;
5657 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5658 if (!EltRC)
5659 return false;
5660 const TargetRegisterClass *DstRC =
5661 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5662 if (!DstRC)
5663 return false;
5664 if (!getSubRegForClass(EltRC, TRI, SubReg))
5665 return false;
5666 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5667 .addImm(0)
5668 .addUse(EltReg)
5669 .addImm(SubReg);
5670 I.eraseFromParent();
5671 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5672 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5673}
5674
5675bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5677 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5678 // Until we port more of the optimized selections, for now just use a vector
5679 // insert sequence.
5680 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5681 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5682 unsigned EltSize = EltTy.getSizeInBits();
5683
5684 if (tryOptConstantBuildVec(I, DstTy, MRI))
5685 return true;
5686 if (tryOptBuildVecToSubregToReg(I, MRI))
5687 return true;
5688
5689 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5690 return false; // Don't support all element types yet.
5691 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5692
5693 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5694 MachineInstr *ScalarToVec =
5695 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5696 I.getOperand(1).getReg(), MIB);
5697 if (!ScalarToVec)
5698 return false;
5699
5700 Register DstVec = ScalarToVec->getOperand(0).getReg();
5701 unsigned DstSize = DstTy.getSizeInBits();
5702
5703 // Keep track of the last MI we inserted. Later on, we might be able to save
5704 // a copy using it.
5705 MachineInstr *PrevMI = ScalarToVec;
5706 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5707 // Note that if we don't do a subregister copy, we can end up making an
5708 // extra register.
5709 Register OpReg = I.getOperand(i).getReg();
5710 // Do not emit inserts for undefs
5711 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5712 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5713 DstVec = PrevMI->getOperand(0).getReg();
5714 }
5715 }
5716
5717 // If DstTy's size in bits is less than 128, then emit a subregister copy
5718 // from DstVec to the last register we've defined.
5719 if (DstSize < 128) {
5720 // Force this to be FPR using the destination vector.
5721 const TargetRegisterClass *RC =
5722 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5723 if (!RC)
5724 return false;
5725 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5726 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5727 return false;
5728 }
5729
5730 unsigned SubReg = 0;
5731 if (!getSubRegForClass(RC, TRI, SubReg))
5732 return false;
5733 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5734 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5735 << "\n");
5736 return false;
5737 }
5738
5739 Register Reg = MRI.createVirtualRegister(RC);
5740 Register DstReg = I.getOperand(0).getReg();
5741
5742 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5743 MachineOperand &RegOp = I.getOperand(1);
5744 RegOp.setReg(Reg);
5745 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5746 } else {
5747 // We either have a vector with all elements (except the first one) undef or
5748 // at least one non-undef non-first element. In the first case, we need to
5749 // constrain the output register ourselves as we may have generated an
5750 // INSERT_SUBREG operation which is a generic operation for which the
5751 // output regclass cannot be automatically chosen.
5752 //
5753 // In the second case, there is no need to do this as it may generate an
5754 // instruction like INSvi32gpr where the regclass can be automatically
5755 // chosen.
5756 //
5757 // Also, we save a copy by re-using the destination register on the final
5758 // insert.
5759 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5761
5762 Register DstReg = PrevMI->getOperand(0).getReg();
5763 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5764 const TargetRegisterClass *RC =
5765 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5766 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5767 }
5768 }
5769
5770 I.eraseFromParent();
5771 return true;
5772}
5773
5774bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5775 unsigned NumVecs,
5776 MachineInstr &I) {
5777 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5778 assert(Opc && "Expected an opcode?");
5779 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5780 auto &MRI = *MIB.getMRI();
5781 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5782 unsigned Size = Ty.getSizeInBits();
5783 assert((Size == 64 || Size == 128) &&
5784 "Destination must be 64 bits or 128 bits?");
5785 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5786 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5787 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5788 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5789 Load.cloneMemRefs(I);
5791 Register SelectedLoadDst = Load->getOperand(0).getReg();
5792 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5793 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5794 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5795 // Emit the subreg copies and immediately select them.
5796 // FIXME: We should refactor our copy code into an emitCopy helper and
5797 // clean up uses of this pattern elsewhere in the selector.
5798 selectCopy(*Vec, TII, MRI, TRI, RBI);
5799 }
5800 return true;
5801}
5802
5803bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5804 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5805 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5806 assert(Opc && "Expected an opcode?");
5807 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5808 auto &MRI = *MIB.getMRI();
5809 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5810 bool Narrow = Ty.getSizeInBits() == 64;
5811
5812 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5813 SmallVector<Register, 4> Regs(NumVecs);
5814 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5815 [](auto MO) { return MO.getReg(); });
5816
5817 if (Narrow) {
5818 transform(Regs, Regs.begin(), [this](Register Reg) {
5819 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5820 ->getOperand(0)
5821 .getReg();
5822 });
5823 Ty = Ty.multiplyElements(2);
5824 }
5825
5826 Register Tuple = createQTuple(Regs, MIB);
5827 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5828 if (!LaneNo)
5829 return false;
5830
5831 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
5832 auto Load = MIB.buildInstr(Opc, {Ty}, {})
5833 .addReg(Tuple)
5834 .addImm(LaneNo->getZExtValue())
5835 .addReg(Ptr);
5836 Load.cloneMemRefs(I);
5838 Register SelectedLoadDst = Load->getOperand(0).getReg();
5839 unsigned SubReg = AArch64::qsub0;
5840 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5841 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
5842 {Narrow ? DstOp(&AArch64::FPR128RegClass)
5843 : DstOp(I.getOperand(Idx).getReg())},
5844 {})
5845 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5846 Register WideReg = Vec.getReg(0);
5847 // Emit the subreg copies and immediately select them.
5848 selectCopy(*Vec, TII, MRI, TRI, RBI);
5849 if (Narrow &&
5850 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
5851 return false;
5852 }
5853 return true;
5854}
5855
5856void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
5857 unsigned NumVecs,
5858 unsigned Opc) {
5859 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5860 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5861 Register Ptr = I.getOperand(1 + NumVecs).getReg();
5862
5863 SmallVector<Register, 2> Regs(NumVecs);
5864 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5865 Regs.begin(), [](auto MO) { return MO.getReg(); });
5866
5867 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5868 : createDTuple(Regs, MIB);
5869 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5870 Store.cloneMemRefs(I);
5872}
5873
5874bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5875 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
5876 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5877 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5878 bool Narrow = Ty.getSizeInBits() == 64;
5879
5880 SmallVector<Register, 2> Regs(NumVecs);
5881 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5882 Regs.begin(), [](auto MO) { return MO.getReg(); });
5883
5884 if (Narrow)
5885 transform(Regs, Regs.begin(), [this](Register Reg) {
5886 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5887 ->getOperand(0)
5888 .getReg();
5889 });
5890
5891 Register Tuple = createQTuple(Regs, MIB);
5892
5893 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
5894 if (!LaneNo)
5895 return false;
5896 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
5897 auto Store = MIB.buildInstr(Opc, {}, {})
5898 .addReg(Tuple)
5899 .addImm(LaneNo->getZExtValue())
5900 .addReg(Ptr);
5901 Store.cloneMemRefs(I);
5903 return true;
5904}
5905
5906bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5908 // Find the intrinsic ID.
5909 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
5910
5911 const LLT S8 = LLT::scalar(8);
5912 const LLT S16 = LLT::scalar(16);
5913 const LLT S32 = LLT::scalar(32);
5914 const LLT S64 = LLT::scalar(64);
5915 const LLT P0 = LLT::pointer(0, 64);
5916 // Select the instruction.
5917 switch (IntrinID) {
5918 default:
5919 return false;
5920 case Intrinsic::aarch64_ldxp:
5921 case Intrinsic::aarch64_ldaxp: {
5922 auto NewI = MIB.buildInstr(
5923 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5924 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5925 {I.getOperand(3)});
5926 NewI.cloneMemRefs(I);
5928 break;
5929 }
5930 case Intrinsic::aarch64_neon_ld1x2: {
5931 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5932 unsigned Opc = 0;
5933 if (Ty == LLT::fixed_vector(8, S8))
5934 Opc = AArch64::LD1Twov8b;
5935 else if (Ty == LLT::fixed_vector(16, S8))
5936 Opc = AArch64::LD1Twov16b;
5937 else if (Ty == LLT::fixed_vector(4, S16))
5938 Opc = AArch64::LD1Twov4h;
5939 else if (Ty == LLT::fixed_vector(8, S16))
5940 Opc = AArch64::LD1Twov8h;
5941 else if (Ty == LLT::fixed_vector(2, S32))
5942 Opc = AArch64::LD1Twov2s;
5943 else if (Ty == LLT::fixed_vector(4, S32))
5944 Opc = AArch64::LD1Twov4s;
5945 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5946 Opc = AArch64::LD1Twov2d;
5947 else if (Ty == S64 || Ty == P0)
5948 Opc = AArch64::LD1Twov1d;
5949 else
5950 llvm_unreachable("Unexpected type for ld1x2!");
5951 selectVectorLoadIntrinsic(Opc, 2, I);
5952 break;
5953 }
5954 case Intrinsic::aarch64_neon_ld1x3: {
5955 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5956 unsigned Opc = 0;
5957 if (Ty == LLT::fixed_vector(8, S8))
5958 Opc = AArch64::LD1Threev8b;
5959 else if (Ty == LLT::fixed_vector(16, S8))
5960 Opc = AArch64::LD1Threev16b;
5961 else if (Ty == LLT::fixed_vector(4, S16))
5962 Opc = AArch64::LD1Threev4h;
5963 else if (Ty == LLT::fixed_vector(8, S16))
5964 Opc = AArch64::LD1Threev8h;
5965 else if (Ty == LLT::fixed_vector(2, S32))
5966 Opc = AArch64::LD1Threev2s;
5967 else if (Ty == LLT::fixed_vector(4, S32))
5968 Opc = AArch64::LD1Threev4s;
5969 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5970 Opc = AArch64::LD1Threev2d;
5971 else if (Ty == S64 || Ty == P0)
5972 Opc = AArch64::LD1Threev1d;
5973 else
5974 llvm_unreachable("Unexpected type for ld1x3!");
5975 selectVectorLoadIntrinsic(Opc, 3, I);
5976 break;
5977 }
5978 case Intrinsic::aarch64_neon_ld1x4: {
5979 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5980 unsigned Opc = 0;
5981 if (Ty == LLT::fixed_vector(8, S8))
5982 Opc = AArch64::LD1Fourv8b;
5983 else if (Ty == LLT::fixed_vector(16, S8))
5984 Opc = AArch64::LD1Fourv16b;
5985 else if (Ty == LLT::fixed_vector(4, S16))
5986 Opc = AArch64::LD1Fourv4h;
5987 else if (Ty == LLT::fixed_vector(8, S16))
5988 Opc = AArch64::LD1Fourv8h;
5989 else if (Ty == LLT::fixed_vector(2, S32))
5990 Opc = AArch64::LD1Fourv2s;
5991 else if (Ty == LLT::fixed_vector(4, S32))
5992 Opc = AArch64::LD1Fourv4s;
5993 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5994 Opc = AArch64::LD1Fourv2d;
5995 else if (Ty == S64 || Ty == P0)
5996 Opc = AArch64::LD1Fourv1d;
5997 else
5998 llvm_unreachable("Unexpected type for ld1x4!");
5999 selectVectorLoadIntrinsic(Opc, 4, I);
6000 break;
6001 }
6002 case Intrinsic::aarch64_neon_ld2: {
6003 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6004 unsigned Opc = 0;
6005 if (Ty == LLT::fixed_vector(8, S8))
6006 Opc = AArch64::LD2Twov8b;
6007 else if (Ty == LLT::fixed_vector(16, S8))
6008 Opc = AArch64::LD2Twov16b;
6009 else if (Ty == LLT::fixed_vector(4, S16))
6010 Opc = AArch64::LD2Twov4h;
6011 else if (Ty == LLT::fixed_vector(8, S16))
6012 Opc = AArch64::LD2Twov8h;
6013 else if (Ty == LLT::fixed_vector(2, S32))
6014 Opc = AArch64::LD2Twov2s;
6015 else if (Ty == LLT::fixed_vector(4, S32))
6016 Opc = AArch64::LD2Twov4s;
6017 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6018 Opc = AArch64::LD2Twov2d;
6019 else if (Ty == S64 || Ty == P0)
6020 Opc = AArch64::LD1Twov1d;
6021 else
6022 llvm_unreachable("Unexpected type for ld2!");
6023 selectVectorLoadIntrinsic(Opc, 2, I);
6024 break;
6025 }
6026 case Intrinsic::aarch64_neon_ld2lane: {
6027 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6028 unsigned Opc;
6029 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6030 Opc = AArch64::LD2i8;
6031 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6032 Opc = AArch64::LD2i16;
6033 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6034 Opc = AArch64::LD2i32;
6035 else if (Ty == LLT::fixed_vector(2, S64) ||
6036 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6037 Opc = AArch64::LD2i64;
6038 else
6039 llvm_unreachable("Unexpected type for st2lane!");
6040 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6041 return false;
6042 break;
6043 }
6044 case Intrinsic::aarch64_neon_ld2r: {
6045 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6046 unsigned Opc = 0;
6047 if (Ty == LLT::fixed_vector(8, S8))
6048 Opc = AArch64::LD2Rv8b;
6049 else if (Ty == LLT::fixed_vector(16, S8))
6050 Opc = AArch64::LD2Rv16b;
6051 else if (Ty == LLT::fixed_vector(4, S16))
6052 Opc = AArch64::LD2Rv4h;
6053 else if (Ty == LLT::fixed_vector(8, S16))
6054 Opc = AArch64::LD2Rv8h;
6055 else if (Ty == LLT::fixed_vector(2, S32))
6056 Opc = AArch64::LD2Rv2s;
6057 else if (Ty == LLT::fixed_vector(4, S32))
6058 Opc = AArch64::LD2Rv4s;
6059 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6060 Opc = AArch64::LD2Rv2d;
6061 else if (Ty == S64 || Ty == P0)
6062 Opc = AArch64::LD2Rv1d;
6063 else
6064 llvm_unreachable("Unexpected type for ld2r!");
6065 selectVectorLoadIntrinsic(Opc, 2, I);
6066 break;
6067 }
6068 case Intrinsic::aarch64_neon_ld3: {
6069 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6070 unsigned Opc = 0;
6071 if (Ty == LLT::fixed_vector(8, S8))
6072 Opc = AArch64::LD3Threev8b;
6073 else if (Ty == LLT::fixed_vector(16, S8))
6074 Opc = AArch64::LD3Threev16b;
6075 else if (Ty == LLT::fixed_vector(4, S16))
6076 Opc = AArch64::LD3Threev4h;
6077 else if (Ty == LLT::fixed_vector(8, S16))
6078 Opc = AArch64::LD3Threev8h;
6079 else if (Ty == LLT::fixed_vector(2, S32))
6080 Opc = AArch64::LD3Threev2s;
6081 else if (Ty == LLT::fixed_vector(4, S32))
6082 Opc = AArch64::LD3Threev4s;
6083 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6084 Opc = AArch64::LD3Threev2d;
6085 else if (Ty == S64 || Ty == P0)
6086 Opc = AArch64::LD1Threev1d;
6087 else
6088 llvm_unreachable("Unexpected type for ld3!");
6089 selectVectorLoadIntrinsic(Opc, 3, I);
6090 break;
6091 }
6092 case Intrinsic::aarch64_neon_ld3lane: {
6093 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6094 unsigned Opc;
6095 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6096 Opc = AArch64::LD3i8;
6097 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6098 Opc = AArch64::LD3i16;
6099 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6100 Opc = AArch64::LD3i32;
6101 else if (Ty == LLT::fixed_vector(2, S64) ||
6102 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6103 Opc = AArch64::LD3i64;
6104 else
6105 llvm_unreachable("Unexpected type for st3lane!");
6106 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6107 return false;
6108 break;
6109 }
6110 case Intrinsic::aarch64_neon_ld3r: {
6111 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6112 unsigned Opc = 0;
6113 if (Ty == LLT::fixed_vector(8, S8))
6114 Opc = AArch64::LD3Rv8b;
6115 else if (Ty == LLT::fixed_vector(16, S8))
6116 Opc = AArch64::LD3Rv16b;
6117 else if (Ty == LLT::fixed_vector(4, S16))
6118 Opc = AArch64::LD3Rv4h;
6119 else if (Ty == LLT::fixed_vector(8, S16))
6120 Opc = AArch64::LD3Rv8h;
6121 else if (Ty == LLT::fixed_vector(2, S32))
6122 Opc = AArch64::LD3Rv2s;
6123 else if (Ty == LLT::fixed_vector(4, S32))
6124 Opc = AArch64::LD3Rv4s;
6125 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6126 Opc = AArch64::LD3Rv2d;
6127 else if (Ty == S64 || Ty == P0)
6128 Opc = AArch64::LD3Rv1d;
6129 else
6130 llvm_unreachable("Unexpected type for ld3r!");
6131 selectVectorLoadIntrinsic(Opc, 3, I);
6132 break;
6133 }
6134 case Intrinsic::aarch64_neon_ld4: {
6135 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6136 unsigned Opc = 0;
6137 if (Ty == LLT::fixed_vector(8, S8))
6138 Opc = AArch64::LD4Fourv8b;
6139 else if (Ty == LLT::fixed_vector(16, S8))
6140 Opc = AArch64::LD4Fourv16b;
6141 else if (Ty == LLT::fixed_vector(4, S16))
6142 Opc = AArch64::LD4Fourv4h;
6143 else if (Ty == LLT::fixed_vector(8, S16))
6144 Opc = AArch64::LD4Fourv8h;
6145 else if (Ty == LLT::fixed_vector(2, S32))
6146 Opc = AArch64::LD4Fourv2s;
6147 else if (Ty == LLT::fixed_vector(4, S32))
6148 Opc = AArch64::LD4Fourv4s;
6149 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6150 Opc = AArch64::LD4Fourv2d;
6151 else if (Ty == S64 || Ty == P0)
6152 Opc = AArch64::LD1Fourv1d;
6153 else
6154 llvm_unreachable("Unexpected type for ld4!");
6155 selectVectorLoadIntrinsic(Opc, 4, I);
6156 break;
6157 }
6158 case Intrinsic::aarch64_neon_ld4lane: {
6159 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6160 unsigned Opc;
6161 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6162 Opc = AArch64::LD4i8;
6163 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6164 Opc = AArch64::LD4i16;
6165 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6166 Opc = AArch64::LD4i32;
6167 else if (Ty == LLT::fixed_vector(2, S64) ||
6168 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6169 Opc = AArch64::LD4i64;
6170 else
6171 llvm_unreachable("Unexpected type for st4lane!");
6172 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6173 return false;
6174 break;
6175 }
6176 case Intrinsic::aarch64_neon_ld4r: {
6177 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6178 unsigned Opc = 0;
6179 if (Ty == LLT::fixed_vector(8, S8))
6180 Opc = AArch64::LD4Rv8b;
6181 else if (Ty == LLT::fixed_vector(16, S8))
6182 Opc = AArch64::LD4Rv16b;
6183 else if (Ty == LLT::fixed_vector(4, S16))
6184 Opc = AArch64::LD4Rv4h;
6185 else if (Ty == LLT::fixed_vector(8, S16))
6186 Opc = AArch64::LD4Rv8h;
6187 else if (Ty == LLT::fixed_vector(2, S32))
6188 Opc = AArch64::LD4Rv2s;
6189 else if (Ty == LLT::fixed_vector(4, S32))
6190 Opc = AArch64::LD4Rv4s;
6191 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6192 Opc = AArch64::LD4Rv2d;
6193 else if (Ty == S64 || Ty == P0)
6194 Opc = AArch64::LD4Rv1d;
6195 else
6196 llvm_unreachable("Unexpected type for ld4r!");
6197 selectVectorLoadIntrinsic(Opc, 4, I);
6198 break;
6199 }
6200 case Intrinsic::aarch64_neon_st1x2: {
6201 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6202 unsigned Opc;
6203 if (Ty == LLT::fixed_vector(8, S8))
6204 Opc = AArch64::ST1Twov8b;
6205 else if (Ty == LLT::fixed_vector(16, S8))
6206 Opc = AArch64::ST1Twov16b;
6207 else if (Ty == LLT::fixed_vector(4, S16))
6208 Opc = AArch64::ST1Twov4h;
6209 else if (Ty == LLT::fixed_vector(8, S16))
6210 Opc = AArch64::ST1Twov8h;
6211 else if (Ty == LLT::fixed_vector(2, S32))
6212 Opc = AArch64::ST1Twov2s;
6213 else if (Ty == LLT::fixed_vector(4, S32))
6214 Opc = AArch64::ST1Twov4s;
6215 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6216 Opc = AArch64::ST1Twov2d;
6217 else if (Ty == S64 || Ty == P0)
6218 Opc = AArch64::ST1Twov1d;
6219 else
6220 llvm_unreachable("Unexpected type for st1x2!");
6221 selectVectorStoreIntrinsic(I, 2, Opc);
6222 break;
6223 }
6224 case Intrinsic::aarch64_neon_st1x3: {
6225 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6226 unsigned Opc;
6227 if (Ty == LLT::fixed_vector(8, S8))
6228 Opc = AArch64::ST1Threev8b;
6229 else if (Ty == LLT::fixed_vector(16, S8))
6230 Opc = AArch64::ST1Threev16b;
6231 else if (Ty == LLT::fixed_vector(4, S16))
6232 Opc = AArch64::ST1Threev4h;
6233 else if (Ty == LLT::fixed_vector(8, S16))
6234 Opc = AArch64::ST1Threev8h;
6235 else if (Ty == LLT::fixed_vector(2, S32))
6236 Opc = AArch64::ST1Threev2s;
6237 else if (Ty == LLT::fixed_vector(4, S32))
6238 Opc = AArch64::ST1Threev4s;
6239 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6240 Opc = AArch64::ST1Threev2d;
6241 else if (Ty == S64 || Ty == P0)
6242 Opc = AArch64::ST1Threev1d;
6243 else
6244 llvm_unreachable("Unexpected type for st1x3!");
6245 selectVectorStoreIntrinsic(I, 3, Opc);
6246 break;
6247 }
6248 case Intrinsic::aarch64_neon_st1x4: {
6249 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6250 unsigned Opc;
6251 if (Ty == LLT::fixed_vector(8, S8))
6252 Opc = AArch64::ST1Fourv8b;
6253 else if (Ty == LLT::fixed_vector(16, S8))
6254 Opc = AArch64::ST1Fourv16b;
6255 else if (Ty == LLT::fixed_vector(4, S16))
6256 Opc = AArch64::ST1Fourv4h;
6257 else if (Ty == LLT::fixed_vector(8, S16))
6258 Opc = AArch64::ST1Fourv8h;
6259 else if (Ty == LLT::fixed_vector(2, S32))
6260 Opc = AArch64::ST1Fourv2s;
6261 else if (Ty == LLT::fixed_vector(4, S32))
6262 Opc = AArch64::ST1Fourv4s;
6263 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6264 Opc = AArch64::ST1Fourv2d;
6265 else if (Ty == S64 || Ty == P0)
6266 Opc = AArch64::ST1Fourv1d;
6267 else
6268 llvm_unreachable("Unexpected type for st1x4!");
6269 selectVectorStoreIntrinsic(I, 4, Opc);
6270 break;
6271 }
6272 case Intrinsic::aarch64_neon_st2: {
6273 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6274 unsigned Opc;
6275 if (Ty == LLT::fixed_vector(8, S8))
6276 Opc = AArch64::ST2Twov8b;
6277 else if (Ty == LLT::fixed_vector(16, S8))
6278 Opc = AArch64::ST2Twov16b;
6279 else if (Ty == LLT::fixed_vector(4, S16))
6280 Opc = AArch64::ST2Twov4h;
6281 else if (Ty == LLT::fixed_vector(8, S16))
6282 Opc = AArch64::ST2Twov8h;
6283 else if (Ty == LLT::fixed_vector(2, S32))
6284 Opc = AArch64::ST2Twov2s;
6285 else if (Ty == LLT::fixed_vector(4, S32))
6286 Opc = AArch64::ST2Twov4s;
6287 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6288 Opc = AArch64::ST2Twov2d;
6289 else if (Ty == S64 || Ty == P0)
6290 Opc = AArch64::ST1Twov1d;
6291 else
6292 llvm_unreachable("Unexpected type for st2!");
6293 selectVectorStoreIntrinsic(I, 2, Opc);
6294 break;
6295 }
6296 case Intrinsic::aarch64_neon_st3: {
6297 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6298 unsigned Opc;
6299 if (Ty == LLT::fixed_vector(8, S8))
6300 Opc = AArch64::ST3Threev8b;
6301 else if (Ty == LLT::fixed_vector(16, S8))
6302 Opc = AArch64::ST3Threev16b;
6303 else if (Ty == LLT::fixed_vector(4, S16))
6304 Opc = AArch64::ST3Threev4h;
6305 else if (Ty == LLT::fixed_vector(8, S16))
6306 Opc = AArch64::ST3Threev8h;
6307 else if (Ty == LLT::fixed_vector(2, S32))
6308 Opc = AArch64::ST3Threev2s;
6309 else if (Ty == LLT::fixed_vector(4, S32))
6310 Opc = AArch64::ST3Threev4s;
6311 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6312 Opc = AArch64::ST3Threev2d;
6313 else if (Ty == S64 || Ty == P0)
6314 Opc = AArch64::ST1Threev1d;
6315 else
6316 llvm_unreachable("Unexpected type for st3!");
6317 selectVectorStoreIntrinsic(I, 3, Opc);
6318 break;
6319 }
6320 case Intrinsic::aarch64_neon_st4: {
6321 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6322 unsigned Opc;
6323 if (Ty == LLT::fixed_vector(8, S8))
6324 Opc = AArch64::ST4Fourv8b;
6325 else if (Ty == LLT::fixed_vector(16, S8))
6326 Opc = AArch64::ST4Fourv16b;
6327 else if (Ty == LLT::fixed_vector(4, S16))
6328 Opc = AArch64::ST4Fourv4h;
6329 else if (Ty == LLT::fixed_vector(8, S16))
6330 Opc = AArch64::ST4Fourv8h;
6331 else if (Ty == LLT::fixed_vector(2, S32))
6332 Opc = AArch64::ST4Fourv2s;
6333 else if (Ty == LLT::fixed_vector(4, S32))
6334 Opc = AArch64::ST4Fourv4s;
6335 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6336 Opc = AArch64::ST4Fourv2d;
6337 else if (Ty == S64 || Ty == P0)
6338 Opc = AArch64::ST1Fourv1d;
6339 else
6340 llvm_unreachable("Unexpected type for st4!");
6341 selectVectorStoreIntrinsic(I, 4, Opc);
6342 break;
6343 }
6344 case Intrinsic::aarch64_neon_st2lane: {
6345 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6346 unsigned Opc;
6347 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6348 Opc = AArch64::ST2i8;
6349 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6350 Opc = AArch64::ST2i16;
6351 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6352 Opc = AArch64::ST2i32;
6353 else if (Ty == LLT::fixed_vector(2, S64) ||
6354 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6355 Opc = AArch64::ST2i64;
6356 else
6357 llvm_unreachable("Unexpected type for st2lane!");
6358 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6359 return false;
6360 break;
6361 }
6362 case Intrinsic::aarch64_neon_st3lane: {
6363 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6364 unsigned Opc;
6365 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6366 Opc = AArch64::ST3i8;
6367 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6368 Opc = AArch64::ST3i16;
6369 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6370 Opc = AArch64::ST3i32;
6371 else if (Ty == LLT::fixed_vector(2, S64) ||
6372 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6373 Opc = AArch64::ST3i64;
6374 else
6375 llvm_unreachable("Unexpected type for st3lane!");
6376 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6377 return false;
6378 break;
6379 }
6380 case Intrinsic::aarch64_neon_st4lane: {
6381 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6382 unsigned Opc;
6383 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6384 Opc = AArch64::ST4i8;
6385 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6386 Opc = AArch64::ST4i16;
6387 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6388 Opc = AArch64::ST4i32;
6389 else if (Ty == LLT::fixed_vector(2, S64) ||
6390 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6391 Opc = AArch64::ST4i64;
6392 else
6393 llvm_unreachable("Unexpected type for st4lane!");
6394 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6395 return false;
6396 break;
6397 }
6398 case Intrinsic::aarch64_mops_memset_tag: {
6399 // Transform
6400 // %dst:gpr(p0) = \
6401 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6402 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6403 // where %dst is updated, into
6404 // %Rd:GPR64common, %Rn:GPR64) = \
6405 // MOPSMemorySetTaggingPseudo \
6406 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6407 // where Rd and Rn are tied.
6408 // It is expected that %val has been extended to s64 in legalization.
6409 // Note that the order of the size/value operands are swapped.
6410
6411 Register DstDef = I.getOperand(0).getReg();
6412 // I.getOperand(1) is the intrinsic function
6413 Register DstUse = I.getOperand(2).getReg();
6414 Register ValUse = I.getOperand(3).getReg();
6415 Register SizeUse = I.getOperand(4).getReg();
6416
6417 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6418 // Therefore an additional virtual register is requried for the updated size
6419 // operand. This value is not accessible via the semantics of the intrinsic.
6420 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6421
6422 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6423 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6424 Memset.cloneMemRefs(I);
6426 break;
6427 }
6428 }
6429
6430 I.eraseFromParent();
6431 return true;
6432}
6433
6434bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6436 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6437
6438 switch (IntrinID) {
6439 default:
6440 break;
6441 case Intrinsic::aarch64_crypto_sha1h: {
6442 Register DstReg = I.getOperand(0).getReg();
6443 Register SrcReg = I.getOperand(2).getReg();
6444
6445 // FIXME: Should this be an assert?
6446 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6447 MRI.getType(SrcReg).getSizeInBits() != 32)
6448 return false;
6449
6450 // The operation has to happen on FPRs. Set up some new FPR registers for
6451 // the source and destination if they are on GPRs.
6452 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6453 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6454 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6455
6456 // Make sure the copy ends up getting constrained properly.
6457 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6458 AArch64::GPR32RegClass, MRI);
6459 }
6460
6461 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6462 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6463
6464 // Actually insert the instruction.
6465 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6466 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6467
6468 // Did we create a new register for the destination?
6469 if (DstReg != I.getOperand(0).getReg()) {
6470 // Yep. Copy the result of the instruction back into the original
6471 // destination.
6472 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6473 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6474 AArch64::GPR32RegClass, MRI);
6475 }
6476
6477 I.eraseFromParent();
6478 return true;
6479 }
6480 case Intrinsic::frameaddress:
6481 case Intrinsic::returnaddress: {
6482 MachineFunction &MF = *I.getParent()->getParent();
6483 MachineFrameInfo &MFI = MF.getFrameInfo();
6484
6485 unsigned Depth = I.getOperand(2).getImm();
6486 Register DstReg = I.getOperand(0).getReg();
6487 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6488
6489 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6490 if (!MFReturnAddr) {
6491 // Insert the copy from LR/X30 into the entry block, before it can be
6492 // clobbered by anything.
6493 MFI.setReturnAddressIsTaken(true);
6494 MFReturnAddr = getFunctionLiveInPhysReg(
6495 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6496 }
6497
6498 if (STI.hasPAuth()) {
6499 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6500 } else {
6501 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6502 MIB.buildInstr(AArch64::XPACLRI);
6503 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6504 }
6505
6506 I.eraseFromParent();
6507 return true;
6508 }
6509
6510 MFI.setFrameAddressIsTaken(true);
6511 Register FrameAddr(AArch64::FP);
6512 while (Depth--) {
6513 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6514 auto Ldr =
6515 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6517 FrameAddr = NextFrame;
6518 }
6519
6520 if (IntrinID == Intrinsic::frameaddress)
6521 MIB.buildCopy({DstReg}, {FrameAddr});
6522 else {
6523 MFI.setReturnAddressIsTaken(true);
6524
6525 if (STI.hasPAuth()) {
6526 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6527 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6528 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6529 } else {
6530 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6531 .addImm(1);
6532 MIB.buildInstr(AArch64::XPACLRI);
6533 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6534 }
6535 }
6536
6537 I.eraseFromParent();
6538 return true;
6539 }
6540 case Intrinsic::swift_async_context_addr:
6541 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6542 {Register(AArch64::FP)})
6543 .addImm(8)
6544 .addImm(0);
6546
6548 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6549 I.eraseFromParent();
6550 return true;
6551 }
6552 return false;
6553}
6554
6556AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6557 auto MaybeImmed = getImmedFromMO(Root);
6558 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6559 return std::nullopt;
6560 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6561 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6562}
6563
6565AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6566 auto MaybeImmed = getImmedFromMO(Root);
6567 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6568 return std::nullopt;
6569 uint64_t Enc = 31 - *MaybeImmed;
6570 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6571}
6572
6574AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6575 auto MaybeImmed = getImmedFromMO(Root);
6576 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6577 return std::nullopt;
6578 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6579 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6580}
6581
6583AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6584 auto MaybeImmed = getImmedFromMO(Root);
6585 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6586 return std::nullopt;
6587 uint64_t Enc = 63 - *MaybeImmed;
6588 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6589}
6590
6591/// Helper to select an immediate value that can be represented as a 12-bit
6592/// value shifted left by either 0 or 12. If it is possible to do so, return
6593/// the immediate and shift value. If not, return std::nullopt.
6594///
6595/// Used by selectArithImmed and selectNegArithImmed.
6597AArch64InstructionSelector::select12BitValueWithLeftShift(
6598 uint64_t Immed) const {
6599 unsigned ShiftAmt;
6600 if (Immed >> 12 == 0) {
6601 ShiftAmt = 0;
6602 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6603 ShiftAmt = 12;
6604 Immed = Immed >> 12;
6605 } else
6606 return std::nullopt;
6607
6608 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6609 return {{
6610 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6611 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6612 }};
6613}
6614
6615/// SelectArithImmed - Select an immediate value that can be represented as
6616/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6617/// Val set to the 12-bit value and Shift set to the shifter operand.
6619AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6620 // This function is called from the addsub_shifted_imm ComplexPattern,
6621 // which lists [imm] as the list of opcode it's interested in, however
6622 // we still need to check whether the operand is actually an immediate
6623 // here because the ComplexPattern opcode list is only used in
6624 // root-level opcode matching.
6625 auto MaybeImmed = getImmedFromMO(Root);
6626 if (MaybeImmed == std::nullopt)
6627 return std::nullopt;
6628 return select12BitValueWithLeftShift(*MaybeImmed);
6629}
6630
6631/// SelectNegArithImmed - As above, but negates the value before trying to
6632/// select it.
6634AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6635 // We need a register here, because we need to know if we have a 64 or 32
6636 // bit immediate.
6637 if (!Root.isReg())
6638 return std::nullopt;
6639 auto MaybeImmed = getImmedFromMO(Root);
6640 if (MaybeImmed == std::nullopt)
6641 return std::nullopt;
6642 uint64_t Immed = *MaybeImmed;
6643
6644 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6645 // have the opposite effect on the C flag, so this pattern mustn't match under
6646 // those circumstances.
6647 if (Immed == 0)
6648 return std::nullopt;
6649
6650 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6651 // the root.
6653 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6654 Immed = ~((uint32_t)Immed) + 1;
6655 else
6656 Immed = ~Immed + 1ULL;
6657
6658 if (Immed & 0xFFFFFFFFFF000000ULL)
6659 return std::nullopt;
6660
6661 Immed &= 0xFFFFFFULL;
6662 return select12BitValueWithLeftShift(Immed);
6663}
6664
6665/// Return true if it is worth folding MI into an extended register. That is,
6666/// if it's safe to pull it into the addressing mode of a load or store as a
6667/// shift.
6668bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6669 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6670 // Always fold if there is one use, or if we're optimizing for size.
6671 Register DefReg = MI.getOperand(0).getReg();
6672 if (MRI.hasOneNonDBGUse(DefReg) ||
6673 MI.getParent()->getParent()->getFunction().hasOptSize())
6674 return true;
6675
6676 // FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
6677 // appropriate.
6678
6679 // We have a fastpath, so folding a shift in and potentially computing it
6680 // many times may be beneficial. Check if this is only used in memory ops.
6681 // If it is, then we should fold.
6682 return all_of(MRI.use_nodbg_instructions(DefReg),
6683 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6684}
6685
6687 switch (Type) {
6688 case AArch64_AM::SXTB:
6689 case AArch64_AM::SXTH:
6690 case AArch64_AM::SXTW:
6691 return true;
6692 default:
6693 return false;
6694 }
6695}
6696
6698AArch64InstructionSelector::selectExtendedSHL(
6700 unsigned SizeInBytes, bool WantsExt) const {
6701 assert(Base.isReg() && "Expected base to be a register operand");
6702 assert(Offset.isReg() && "Expected offset to be a register operand");
6703
6705 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6706
6707 unsigned OffsetOpc = OffsetInst->getOpcode();
6708 bool LookedThroughZExt = false;
6709 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6710 // Try to look through a ZEXT.
6711 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6712 return std::nullopt;
6713
6714 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6715 OffsetOpc = OffsetInst->getOpcode();
6716 LookedThroughZExt = true;
6717
6718 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6719 return std::nullopt;
6720 }
6721 // Make sure that the memory op is a valid size.
6722 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6723 if (LegalShiftVal == 0)
6724 return std::nullopt;
6725 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6726 return std::nullopt;
6727
6728 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6729 // register we will offset is the LHS, and the register containing the
6730 // constant is the RHS.
6731 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6732 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6733 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6734 if (!ValAndVReg) {
6735 // We didn't get a constant on the RHS. If the opcode is a shift, then
6736 // we're done.
6737 if (OffsetOpc == TargetOpcode::G_SHL)
6738 return std::nullopt;
6739
6740 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6741 std::swap(OffsetReg, ConstantReg);
6742 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6743 if (!ValAndVReg)
6744 return std::nullopt;
6745 }
6746
6747 // The value must fit into 3 bits, and must be positive. Make sure that is
6748 // true.
6749 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6750
6751 // Since we're going to pull this into a shift, the constant value must be
6752 // a power of 2. If we got a multiply, then we need to check this.
6753 if (OffsetOpc == TargetOpcode::G_MUL) {
6754 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6755 return std::nullopt;
6756
6757 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6758 ImmVal = Log2_32(ImmVal);
6759 }
6760
6761 if ((ImmVal & 0x7) != ImmVal)
6762 return std::nullopt;
6763
6764 // We are only allowed to shift by LegalShiftVal. This shift value is built
6765 // into the instruction, so we can't just use whatever we want.
6766 if (ImmVal != LegalShiftVal)
6767 return std::nullopt;
6768
6769 unsigned SignExtend = 0;
6770 if (WantsExt) {
6771 // Check if the offset is defined by an extend, unless we looked through a
6772 // G_ZEXT earlier.
6773 if (!LookedThroughZExt) {
6774 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
6775 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
6777 return std::nullopt;
6778
6779 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
6780 // We only support SXTW for signed extension here.
6781 if (SignExtend && Ext != AArch64_AM::SXTW)
6782 return std::nullopt;
6783 OffsetReg = ExtInst->getOperand(1).getReg();
6784 }
6785
6786 // Need a 32-bit wide register here.
6787 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
6788 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6789 }
6790
6791 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
6792 // offset. Signify that we are shifting by setting the shift flag to 1.
6793 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
6794 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
6795 [=](MachineInstrBuilder &MIB) {
6796 // Need to add both immediates here to make sure that they are both
6797 // added to the instruction.
6798 MIB.addImm(SignExtend);
6799 MIB.addImm(1);
6800 }}};
6801}
6802
6803/// This is used for computing addresses like this:
6804///
6805/// ldr x1, [x2, x3, lsl #3]
6806///
6807/// Where x2 is the base register, and x3 is an offset register. The shift-left
6808/// is a constant value specific to this load instruction. That is, we'll never
6809/// see anything other than a 3 here (which corresponds to the size of the
6810/// element being loaded.)
6812AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6813 MachineOperand &Root, unsigned SizeInBytes) const {
6814 if (!Root.isReg())
6815 return std::nullopt;
6817
6818 // We want to find something like this:
6819 //
6820 // val = G_CONSTANT LegalShiftVal
6821 // shift = G_SHL off_reg val
6822 // ptr = G_PTR_ADD base_reg shift
6823 // x = G_LOAD ptr
6824 //
6825 // And fold it into this addressing mode:
6826 //
6827 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
6828
6829 // Check if we can find the G_PTR_ADD.
6830 MachineInstr *PtrAdd =
6831 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6832 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6833 return std::nullopt;
6834
6835 // Now, try to match an opcode which will match our specific offset.
6836 // We want a G_SHL or a G_MUL.
6837 MachineInstr *OffsetInst =
6839 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
6840 OffsetInst->getOperand(0), SizeInBytes,
6841 /*WantsExt=*/false);
6842}
6843
6844/// This is used for computing addresses like this:
6845///
6846/// ldr x1, [x2, x3]
6847///
6848/// Where x2 is the base register, and x3 is an offset register.
6849///
6850/// When possible (or profitable) to fold a G_PTR_ADD into the address
6851/// calculation, this will do so. Otherwise, it will return std::nullopt.
6853AArch64InstructionSelector::selectAddrModeRegisterOffset(
6854 MachineOperand &Root) const {
6856
6857 // We need a GEP.
6858 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
6859 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
6860 return std::nullopt;
6861
6862 // If this is used more than once, let's not bother folding.
6863 // TODO: Check if they are memory ops. If they are, then we can still fold
6864 // without having to recompute anything.
6865 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
6866 return std::nullopt;
6867
6868 // Base is the GEP's LHS, offset is its RHS.
6869 return {{[=](MachineInstrBuilder &MIB) {
6870 MIB.addUse(Gep->getOperand(1).getReg());
6871 },
6872 [=](MachineInstrBuilder &MIB) {
6873 MIB.addUse(Gep->getOperand(2).getReg());
6874 },
6875 [=](MachineInstrBuilder &MIB) {
6876 // Need to add both immediates here to make sure that they are both
6877 // added to the instruction.
6878 MIB.addImm(0);
6879 MIB.addImm(0);
6880 }}};
6881}
6882
6883/// This is intended to be equivalent to selectAddrModeXRO in
6884/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
6886AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
6887 unsigned SizeInBytes) const {
6889 if (!Root.isReg())
6890 return std::nullopt;
6891 MachineInstr *PtrAdd =
6892 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6893 if (!PtrAdd)
6894 return std::nullopt;
6895
6896 // Check for an immediates which cannot be encoded in the [base + imm]
6897 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
6898 // end up with code like:
6899 //
6900 // mov x0, wide
6901 // add x1 base, x0
6902 // ldr x2, [x1, x0]
6903 //
6904 // In this situation, we can use the [base, xreg] addressing mode to save an
6905 // add/sub:
6906 //
6907 // mov x0, wide
6908 // ldr x2, [base, x0]
6909 auto ValAndVReg =
6911 if (ValAndVReg) {
6912 unsigned Scale = Log2_32(SizeInBytes);
6913 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
6914
6915 // Skip immediates that can be selected in the load/store addresing
6916 // mode.
6917 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
6918 ImmOff < (0x1000 << Scale))
6919 return std::nullopt;
6920
6921 // Helper lambda to decide whether or not it is preferable to emit an add.
6922 auto isPreferredADD = [](int64_t ImmOff) {
6923 // Constants in [0x0, 0xfff] can be encoded in an add.
6924 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
6925 return true;
6926
6927 // Can it be encoded in an add lsl #12?
6928 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
6929 return false;
6930
6931 // It can be encoded in an add lsl #12, but we may not want to. If it is
6932 // possible to select this as a single movz, then prefer that. A single
6933 // movz is faster than an add with a shift.
6934 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
6935 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
6936 };
6937
6938 // If the immediate can be encoded in a single add/sub, then bail out.
6939 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
6940 return std::nullopt;
6941 }
6942
6943 // Try to fold shifts into the addressing mode.
6944 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
6945 if (AddrModeFns)
6946 return AddrModeFns;
6947
6948 // If that doesn't work, see if it's possible to fold in registers from
6949 // a GEP.
6950 return selectAddrModeRegisterOffset(Root);
6951}
6952
6953/// This is used for computing addresses like this:
6954///
6955/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
6956///
6957/// Where we have a 64-bit base register, a 32-bit offset register, and an
6958/// extend (which may or may not be signed).
6960AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
6961 unsigned SizeInBytes) const {
6963
6964 MachineInstr *PtrAdd =
6965 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6966 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6967 return std::nullopt;
6968
6969 MachineOperand &LHS = PtrAdd->getOperand(1);
6970 MachineOperand &RHS = PtrAdd->getOperand(2);
6971 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
6972
6973 // The first case is the same as selectAddrModeXRO, except we need an extend.
6974 // In this case, we try to find a shift and extend, and fold them into the
6975 // addressing mode.
6976 //
6977 // E.g.
6978 //
6979 // off_reg = G_Z/S/ANYEXT ext_reg
6980 // val = G_CONSTANT LegalShiftVal
6981 // shift = G_SHL off_reg val
6982 // ptr = G_PTR_ADD base_reg shift
6983 // x = G_LOAD ptr
6984 //
6985 // In this case we can get a load like this:
6986 //
6987 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
6988 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
6989 SizeInBytes, /*WantsExt=*/true);
6990 if (ExtendedShl)
6991 return ExtendedShl;
6992
6993 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
6994 //
6995 // e.g.
6996 // ldr something, [base_reg, ext_reg, sxtw]
6997 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6998 return std::nullopt;
6999
7000 // Check if this is an extend. We'll get an extend type if it is.
7002 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7004 return std::nullopt;
7005
7006 // Need a 32-bit wide register.
7007 MachineIRBuilder MIB(*PtrAdd);
7008 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7009 AArch64::GPR32RegClass, MIB);
7010 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7011
7012 // Base is LHS, offset is ExtReg.
7013 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7014 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7015 [=](MachineInstrBuilder &MIB) {
7016 MIB.addImm(SignExtend);
7017 MIB.addImm(0);
7018 }}};
7019}
7020
7021/// Select a "register plus unscaled signed 9-bit immediate" address. This
7022/// should only match when there is an offset that is not valid for a scaled
7023/// immediate addressing mode. The "Size" argument is the size in bytes of the
7024/// memory reference, which is needed here to know what is valid for a scaled
7025/// immediate.
7027AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7028 unsigned Size) const {
7030 Root.getParent()->getParent()->getParent()->getRegInfo();
7031
7032 if (!Root.isReg())
7033 return std::nullopt;
7034
7035 if (!isBaseWithConstantOffset(Root, MRI))
7036 return std::nullopt;
7037
7038 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7039
7040 MachineOperand &OffImm = RootDef->getOperand(2);
7041 if (!OffImm.isReg())
7042 return std::nullopt;
7043 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7044 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7045 return std::nullopt;
7046 int64_t RHSC;
7047 MachineOperand &RHSOp1 = RHS->getOperand(1);
7048 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7049 return std::nullopt;
7050 RHSC = RHSOp1.getCImm()->getSExtValue();
7051
7052 if (RHSC >= -256 && RHSC < 256) {
7053 MachineOperand &Base = RootDef->getOperand(1);
7054 return {{
7055 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7056 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7057 }};
7058 }
7059 return std::nullopt;
7060}
7061
7063AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7064 unsigned Size,
7065 MachineRegisterInfo &MRI) const {
7066 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7067 return std::nullopt;
7068 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7069 if (Adrp.getOpcode() != AArch64::ADRP)
7070 return std::nullopt;
7071
7072 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7073 auto Offset = Adrp.getOperand(1).getOffset();
7074 if (Offset % Size != 0)
7075 return std::nullopt;
7076
7077 auto GV = Adrp.getOperand(1).getGlobal();
7078 if (GV->isThreadLocal())
7079 return std::nullopt;
7080
7081 auto &MF = *RootDef.getParent()->getParent();
7082 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7083 return std::nullopt;
7084
7085 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7086 MachineIRBuilder MIRBuilder(RootDef);
7087 Register AdrpReg = Adrp.getOperand(0).getReg();
7088 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7089 [=](MachineInstrBuilder &MIB) {
7090 MIB.addGlobalAddress(GV, Offset,
7091 OpFlags | AArch64II::MO_PAGEOFF |
7093 }}};
7094}
7095
7096/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7097/// "Size" argument is the size in bytes of the memory reference, which
7098/// determines the scale.
7100AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7101 unsigned Size) const {
7102 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7104
7105 if (!Root.isReg())
7106 return std::nullopt;
7107
7108 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7109 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7110 return {{
7111 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7112 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7113 }};
7114 }
7115
7117 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7118 if (CM == CodeModel::Small) {
7119 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7120 if (OpFns)
7121 return OpFns;
7122 }
7123
7124 if (isBaseWithConstantOffset(Root, MRI)) {
7125 MachineOperand &LHS = RootDef->getOperand(1);
7126 MachineOperand &RHS = RootDef->getOperand(2);
7127 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7128 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7129
7130 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7131 unsigned Scale = Log2_32(Size);
7132 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7133 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7134 return {{
7135 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7136 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7137 }};
7138
7139 return {{
7140 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7141 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7142 }};
7143 }
7144 }
7145
7146 // Before falling back to our general case, check if the unscaled
7147 // instructions can handle this. If so, that's preferable.
7148 if (selectAddrModeUnscaled(Root, Size))
7149 return std::nullopt;
7150
7151 return {{
7152 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7153 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7154 }};
7155}
7156
7157/// Given a shift instruction, return the correct shift type for that
7158/// instruction.
7160 switch (MI.getOpcode()) {
7161 default:
7163 case TargetOpcode::G_SHL:
7164 return AArch64_AM::LSL;
7165 case TargetOpcode::G_LSHR:
7166 return AArch64_AM::LSR;
7167 case TargetOpcode::G_ASHR:
7168 return AArch64_AM::ASR;
7169 case TargetOpcode::G_ROTR:
7170 return AArch64_AM::ROR;
7171 }
7172}
7173
7174/// Select a "shifted register" operand. If the value is not shifted, set the
7175/// shift operand to a default value of "lsl 0".
7177AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7178 bool AllowROR) const {
7179 if (!Root.isReg())
7180 return std::nullopt;
7182 Root.getParent()->getParent()->getParent()->getRegInfo();
7183
7184 // Check if the operand is defined by an instruction which corresponds to
7185 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7186 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7188 if (ShType == AArch64_AM::InvalidShiftExtend)
7189 return std::nullopt;
7190 if (ShType == AArch64_AM::ROR && !AllowROR)
7191 return std::nullopt;
7192 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
7193 return std::nullopt;
7194
7195 // Need an immediate on the RHS.
7196 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7197 auto Immed = getImmedFromMO(ShiftRHS);
7198 if (!Immed)
7199 return std::nullopt;
7200
7201 // We have something that we can fold. Fold in the shift's LHS and RHS into
7202 // the instruction.
7203 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7204 Register ShiftReg = ShiftLHS.getReg();
7205
7206 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7207 unsigned Val = *Immed & (NumBits - 1);
7208 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7209
7210 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7211 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7212}
7213
7214AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7215 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7216 unsigned Opc = MI.getOpcode();
7217
7218 // Handle explicit extend instructions first.
7219 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7220 unsigned Size;
7221 if (Opc == TargetOpcode::G_SEXT)
7222 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7223 else
7224 Size = MI.getOperand(2).getImm();
7225 assert(Size != 64 && "Extend from 64 bits?");
7226 switch (Size) {
7227 case 8:
7228 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7229 case 16:
7230 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7231 case 32:
7232 return AArch64_AM::SXTW;
7233 default:
7235 }
7236 }
7237
7238 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7239 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7240 assert(Size != 64 && "Extend from 64 bits?");
7241 switch (Size) {
7242 case 8:
7243 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7244 case 16:
7245 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7246 case 32:
7247 return AArch64_AM::UXTW;
7248 default:
7250 }
7251 }
7252
7253 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7254 // on the RHS.
7255 if (Opc != TargetOpcode::G_AND)
7257
7258 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7259 if (!MaybeAndMask)
7261 uint64_t AndMask = *MaybeAndMask;
7262 switch (AndMask) {
7263 default:
7265 case 0xFF:
7266 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7267 case 0xFFFF:
7268 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7269 case 0xFFFFFFFF:
7270 return AArch64_AM::UXTW;
7271 }
7272}
7273
7274Register AArch64InstructionSelector::moveScalarRegClass(
7275 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7276 MachineRegisterInfo &MRI = *MIB.getMRI();
7277 auto Ty = MRI.getType(Reg);
7278 assert(!Ty.isVector() && "Expected scalars only!");
7279 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7280 return Reg;
7281
7282 // Create a copy and immediately select it.
7283 // FIXME: We should have an emitCopy function?
7284 auto Copy = MIB.buildCopy({&RC}, {Reg});
7285 selectCopy(*Copy, TII, MRI, TRI, RBI);
7286 return Copy.getReg(0);
7287}
7288
7289/// Select an "extended register" operand. This operand folds in an extend
7290/// followed by an optional left shift.
7292AArch64InstructionSelector::selectArithExtendedRegister(
7293 MachineOperand &Root) const {
7294 if (!Root.isReg())
7295 return std::nullopt;
7297 Root.getParent()->getParent()->getParent()->getRegInfo();
7298
7299 uint64_t ShiftVal = 0;
7300 Register ExtReg;
7302 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7303 if (!RootDef)
7304 return std::nullopt;
7305
7306 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
7307 return std::nullopt;
7308
7309 // Check if we can fold a shift and an extend.
7310 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7311 // Look for a constant on the RHS of the shift.
7312 MachineOperand &RHS = RootDef->getOperand(2);
7313 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7314 if (!MaybeShiftVal)
7315 return std::nullopt;
7316 ShiftVal = *MaybeShiftVal;
7317 if (ShiftVal > 4)
7318 return std::nullopt;
7319 // Look for a valid extend instruction on the LHS of the shift.
7320 MachineOperand &LHS = RootDef->getOperand(1);
7321 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7322 if (!ExtDef)
7323 return std::nullopt;
7324 Ext = getExtendTypeForInst(*ExtDef, MRI);
7326 return std::nullopt;
7327 ExtReg = ExtDef->getOperand(1).getReg();
7328 } else {
7329 // Didn't get a shift. Try just folding an extend.
7330 Ext = getExtendTypeForInst(*RootDef, MRI);
7332 return std::nullopt;
7333 ExtReg = RootDef->getOperand(1).getReg();
7334
7335 // If we have a 32 bit instruction which zeroes out the high half of a
7336 // register, we get an implicit zero extend for free. Check if we have one.
7337 // FIXME: We actually emit the extend right now even though we don't have
7338 // to.
7339 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7340 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7341 if (isDef32(*ExtInst))
7342 return std::nullopt;
7343 }
7344 }
7345
7346 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7347 // copy.
7348 MachineIRBuilder MIB(*RootDef);
7349 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7350
7351 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7352 [=](MachineInstrBuilder &MIB) {
7353 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7354 }}};
7355}
7356
7358AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7359 if (!Root.isReg())
7360 return std::nullopt;
7362 Root.getParent()->getParent()->getParent()->getRegInfo();
7363
7364 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7365 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7366 STI.isLittleEndian())
7367 Extract =
7368 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7369 if (!Extract)
7370 return std::nullopt;
7371
7372 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7373 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7374 Register ExtReg = Extract->MI->getOperand(2).getReg();
7375 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7376 }
7377 }
7378 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7379 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7381 Extract->MI->getOperand(2).getReg(), MRI);
7382 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7383 LaneIdx->Value.getSExtValue() == 1) {
7384 Register ExtReg = Extract->MI->getOperand(1).getReg();
7385 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7386 }
7387 }
7388
7389 return std::nullopt;
7390}
7391
7392void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7393 const MachineInstr &MI,
7394 int OpIdx) const {
7395 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7396 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7397 "Expected G_CONSTANT");
7398 std::optional<int64_t> CstVal =
7399 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7400 assert(CstVal && "Expected constant value");
7401 MIB.addImm(*CstVal);
7402}
7403
7404void AArch64InstructionSelector::renderLogicalImm32(
7405 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7406 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7407 "Expected G_CONSTANT");
7408 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7410 MIB.addImm(Enc);
7411}
7412
7413void AArch64InstructionSelector::renderLogicalImm64(
7414 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7415 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7416 "Expected G_CONSTANT");
7417 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7419 MIB.addImm(Enc);
7420}
7421
7422void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7423 const MachineInstr &MI,
7424 int OpIdx) const {
7425 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7426 "Expected G_UBSANTRAP");
7427 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7428}
7429
7430void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7431 const MachineInstr &MI,
7432 int OpIdx) const {
7433 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7434 "Expected G_FCONSTANT");
7435 MIB.addImm(
7436 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7437}
7438
7439void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7440 const MachineInstr &MI,
7441 int OpIdx) const {
7442 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7443 "Expected G_FCONSTANT");
7444 MIB.addImm(
7445 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7446}
7447
7448void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7449 const MachineInstr &MI,
7450 int OpIdx) const {
7451 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7452 "Expected G_FCONSTANT");
7453 MIB.addImm(
7454 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7455}
7456
7457void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7458 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7459 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7460 "Expected G_FCONSTANT");
7462 .getFPImm()
7463 ->getValueAPF()
7464 .bitcastToAPInt()
7465 .getZExtValue()));
7466}
7467
7468bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7469 const MachineInstr &MI, unsigned NumBytes) const {
7470 if (!MI.mayLoadOrStore())
7471 return false;
7472 assert(MI.hasOneMemOperand() &&
7473 "Expected load/store to have only one mem op!");
7474 return (*MI.memoperands_begin())->getSize() == NumBytes;
7475}
7476
7477bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7478 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7479 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7480 return false;
7481
7482 // Only return true if we know the operation will zero-out the high half of
7483 // the 64-bit register. Truncates can be subregister copies, which don't
7484 // zero out the high bits. Copies and other copy-like instructions can be
7485 // fed by truncates, or could be lowered as subregister copies.
7486 switch (MI.getOpcode()) {
7487 default:
7488 return true;
7489 case TargetOpcode::COPY:
7490 case TargetOpcode::G_BITCAST:
7491 case TargetOpcode::G_TRUNC:
7492 case TargetOpcode::G_PHI:
7493 return false;
7494 }
7495}
7496
7497
7498// Perform fixups on the given PHI instruction's operands to force them all
7499// to be the same as the destination regbank.
7501 const AArch64RegisterBankInfo &RBI) {
7502 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7503 Register DstReg = MI.getOperand(0).getReg();
7504 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7505 assert(DstRB && "Expected PHI dst to have regbank assigned");
7506 MachineIRBuilder MIB(MI);
7507
7508 // Go through each operand and ensure it has the same regbank.
7509 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7510 if (!MO.isReg())
7511 continue;
7512 Register OpReg = MO.getReg();
7513 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7514 if (RB != DstRB) {
7515 // Insert a cross-bank copy.
7516 auto *OpDef = MRI.getVRegDef(OpReg);
7517 const LLT &Ty = MRI.getType(OpReg);
7518 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7519
7520 // Any instruction we insert must appear after all PHIs in the block
7521 // for the block to be valid MIR.
7522 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7523 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7524 InsertPt = OpDefBB.getFirstNonPHI();
7525 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7526 auto Copy = MIB.buildCopy(Ty, OpReg);
7527 MRI.setRegBank(Copy.getReg(0), *DstRB);
7528 MO.setReg(Copy.getReg(0));
7529 }
7530 }
7531}
7532
7533void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7534 // We're looking for PHIs, build a list so we don't invalidate iterators.
7537 for (auto &BB : MF) {
7538 for (auto &MI : BB) {
7539 if (MI.getOpcode() == TargetOpcode::G_PHI)
7540 Phis.emplace_back(&MI);
7541 }
7542 }
7543
7544 for (auto *MI : Phis) {
7545 // We need to do some work here if the operand types are < 16 bit and they
7546 // are split across fpr/gpr banks. Since all types <32b on gpr
7547 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7548 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7549 // be selecting heterogenous regbanks for operands if possible, but we
7550 // still need to be able to deal with it here.
7551 //
7552 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7553 // one other operand is on the fpr bank, then we add cross-bank copies
7554 // to homogenize the operand banks. For simplicity the bank that we choose
7555 // to settle on is whatever bank the def operand has. For example:
7556 //
7557 // %endbb:
7558 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7559 // =>
7560 // %bb2:
7561 // ...
7562 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7563 // ...
7564 // %endbb:
7565 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7566 bool HasGPROp = false, HasFPROp = false;
7567 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7568 if (!MO.isReg())
7569 continue;
7570 const LLT &Ty = MRI.getType(MO.getReg());
7571 if (!Ty.isValid() || !Ty.isScalar())
7572 break;
7573 if (Ty.getSizeInBits() >= 32)
7574 break;
7575 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7576 // If for some reason we don't have a regbank yet. Don't try anything.
7577 if (!RB)
7578 break;
7579
7580 if (RB->getID() == AArch64::GPRRegBankID)
7581 HasGPROp = true;
7582 else
7583 HasFPROp = true;
7584 }
7585 // We have heterogenous regbanks, need to fixup.
7586 if (HasGPROp && HasFPROp)
7587 fixupPHIOpBanks(*MI, MRI, RBI);
7588 }
7589}
7590
7591namespace llvm {
7594 AArch64Subtarget &Subtarget,
7596 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7597}
7598}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
MachineBasicBlock & MBB
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:996
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:1001
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:1004
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:1002
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:1009
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:1003
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
bool isIntPredicate() const
Definition: InstrTypes.h:1123
bool isUnsigned() const
Definition: InstrTypes.h:1271
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:2958
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:318
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:315
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:160
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1699
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1758
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:262
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1535
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:896
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:307
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1623
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:432
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.