LLVM 19.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
195 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
197 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
198 /// SUBREG_TO_REG.
199 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
200 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
203
204 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
207 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
208
209 /// Helper function to select vector load intrinsics like
210 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
211 /// \p Opc is the opcode that the selected instruction should use.
212 /// \p NumVecs is the number of vector destinations for the instruction.
213 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
214 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
215 MachineInstr &I);
216 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
217 MachineInstr &I);
218 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
219 unsigned Opc);
220 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
221 unsigned Opc);
222 bool selectIntrinsicWithSideEffects(MachineInstr &I,
224 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
228 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232
233 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
234 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
235 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
236
237 unsigned emitConstantPoolEntry(const Constant *CPVal,
238 MachineFunction &MF) const;
240 MachineIRBuilder &MIRBuilder) const;
241
242 // Emit a vector concat operation.
243 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
244 Register Op2,
245 MachineIRBuilder &MIRBuilder) const;
246
247 // Emit an integer compare between LHS and RHS, which checks for Predicate.
248 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
249 MachineOperand &Predicate,
250 MachineIRBuilder &MIRBuilder) const;
251
252 /// Emit a floating point comparison between \p LHS and \p RHS.
253 /// \p Pred if given is the intended predicate to use.
255 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
256 std::optional<CmpInst::Predicate> = std::nullopt) const;
257
259 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
260 std::initializer_list<llvm::SrcOp> SrcOps,
261 MachineIRBuilder &MIRBuilder,
262 const ComplexRendererFns &RenderFns = std::nullopt) const;
263 /// Helper function to emit an add or sub instruction.
264 ///
265 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
266 /// in a specific order.
267 ///
268 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
269 ///
270 /// \code
271 /// const std::array<std::array<unsigned, 2>, 4> Table {
272 /// {{AArch64::ADDXri, AArch64::ADDWri},
273 /// {AArch64::ADDXrs, AArch64::ADDWrs},
274 /// {AArch64::ADDXrr, AArch64::ADDWrr},
275 /// {AArch64::SUBXri, AArch64::SUBWri},
276 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
277 /// \endcode
278 ///
279 /// Each row in the table corresponds to a different addressing mode. Each
280 /// column corresponds to a different register size.
281 ///
282 /// \attention Rows must be structured as follows:
283 /// - Row 0: The ri opcode variants
284 /// - Row 1: The rs opcode variants
285 /// - Row 2: The rr opcode variants
286 /// - Row 3: The ri opcode variants for negative immediates
287 /// - Row 4: The rx opcode variants
288 ///
289 /// \attention Columns must be structured as follows:
290 /// - Column 0: The 64-bit opcode variants
291 /// - Column 1: The 32-bit opcode variants
292 ///
293 /// \p Dst is the destination register of the binop to emit.
294 /// \p LHS is the left-hand operand of the binop to emit.
295 /// \p RHS is the right-hand operand of the binop to emit.
296 MachineInstr *emitAddSub(
297 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
299 MachineIRBuilder &MIRBuilder) const;
300 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
302 MachineIRBuilder &MIRBuilder) const;
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
315 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
317 MachineIRBuilder &MIRBuilder) const;
318 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
319 const RegisterBank &DstRB, LLT ScalarTy,
320 Register VecReg, unsigned LaneIdx,
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
324 MachineIRBuilder &MIRBuilder) const;
325 /// Emit a CSet for a FP compare.
326 ///
327 /// \p Dst is expected to be a 32-bit scalar register.
328 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
329 MachineIRBuilder &MIRBuilder) const;
330
331 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
332 /// Might elide the instruction if the previous instruction already sets NZCV
333 /// correctly.
334 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
335
336 /// Emit the overflow op for \p Opcode.
337 ///
338 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
339 /// G_USUBO, etc.
340 std::pair<MachineInstr *, AArch64CC::CondCode>
341 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
342 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
343
344 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
345
346 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
347 /// In some cases this is even possible with OR operations in the expression.
349 MachineIRBuilder &MIB) const;
352 AArch64CC::CondCode Predicate,
354 MachineIRBuilder &MIB) const;
356 bool Negate, Register CCOp,
357 AArch64CC::CondCode Predicate,
358 MachineIRBuilder &MIB) const;
359
360 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
361 /// \p IsNegative is true if the test should be "not zero".
362 /// This will also optimize the test bit instruction when possible.
363 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
364 MachineBasicBlock *DstMBB,
365 MachineIRBuilder &MIB) const;
366
367 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
368 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
369 MachineBasicBlock *DestMBB,
370 MachineIRBuilder &MIB) const;
371
372 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
373 // We use these manually instead of using the importer since it doesn't
374 // support SDNodeXForm.
375 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
376 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
377 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
379
380 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
381 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
382 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
383
384 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
385 unsigned Size) const;
386
387 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
388 return selectAddrModeUnscaled(Root, 1);
389 }
390 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
391 return selectAddrModeUnscaled(Root, 2);
392 }
393 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
394 return selectAddrModeUnscaled(Root, 4);
395 }
396 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
397 return selectAddrModeUnscaled(Root, 8);
398 }
399 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
400 return selectAddrModeUnscaled(Root, 16);
401 }
402
403 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
404 /// from complex pattern matchers like selectAddrModeIndexed().
405 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
406 MachineRegisterInfo &MRI) const;
407
408 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
409 unsigned Size) const;
410 template <int Width>
411 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
412 return selectAddrModeIndexed(Root, Width / 8);
413 }
414
415 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
416 const MachineRegisterInfo &MRI) const;
417 ComplexRendererFns
418 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
419 unsigned SizeInBytes) const;
420
421 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
422 /// or not a shift + extend should be folded into an addressing mode. Returns
423 /// None when this is not profitable or possible.
424 ComplexRendererFns
425 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
426 MachineOperand &Offset, unsigned SizeInBytes,
427 bool WantsExt) const;
428 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
429 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
430 unsigned SizeInBytes) const;
431 template <int Width>
432 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
433 return selectAddrModeXRO(Root, Width / 8);
434 }
435
436 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
440 return selectAddrModeWRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
444 bool AllowROR = false) const;
445
446 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
447 return selectShiftedRegister(Root);
448 }
449
450 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
451 return selectShiftedRegister(Root, true);
452 }
453
454 /// Given an extend instruction, determine the correct shift-extend type for
455 /// that instruction.
456 ///
457 /// If the instruction is going to be used in a load or store, pass
458 /// \p IsLoadStore = true.
460 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
461 bool IsLoadStore = false) const;
462
463 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
464 ///
465 /// \returns Either \p Reg if no change was necessary, or the new register
466 /// created by moving \p Reg.
467 ///
468 /// Note: This uses emitCopy right now.
469 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
470 MachineIRBuilder &MIB) const;
471
472 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
473
474 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
475
476 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
477 int OpIdx = -1) const;
478 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
479 int OpIdx = -1) const;
480 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
481 int OpIdx = -1) const;
482 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
483 int OpIdx) const;
484 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
485 int OpIdx = -1) const;
486 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
487 int OpIdx = -1) const;
488 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx = -1) const;
490 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
491 const MachineInstr &MI,
492 int OpIdx = -1) const;
493
494 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
495 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
496
497 // Optimization methods.
498 bool tryOptSelect(GSelect &Sel);
499 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
500 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
501 MachineOperand &Predicate,
502 MachineIRBuilder &MIRBuilder) const;
503
504 /// Return true if \p MI is a load or store of \p NumBytes bytes.
505 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
506
507 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
508 /// register zeroed out. In other words, the result of MI has been explicitly
509 /// zero extended.
510 bool isDef32(const MachineInstr &MI) const;
511
513 const AArch64Subtarget &STI;
514 const AArch64InstrInfo &TII;
516 const AArch64RegisterBankInfo &RBI;
517
518 bool ProduceNonFlagSettingCondBr = false;
519
520 // Some cached values used during selection.
521 // We use LR as a live-in register, and we keep track of it here as it can be
522 // clobbered by calls.
523 Register MFReturnAddr;
524
526
527#define GET_GLOBALISEL_PREDICATES_DECL
528#include "AArch64GenGlobalISel.inc"
529#undef GET_GLOBALISEL_PREDICATES_DECL
530
531// We declare the temporaries used by selectImpl() in the class to minimize the
532// cost of constructing placeholder values.
533#define GET_GLOBALISEL_TEMPORARIES_DECL
534#include "AArch64GenGlobalISel.inc"
535#undef GET_GLOBALISEL_TEMPORARIES_DECL
536};
537
538} // end anonymous namespace
539
540#define GET_GLOBALISEL_IMPL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_IMPL
543
544AArch64InstructionSelector::AArch64InstructionSelector(
545 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
546 const AArch64RegisterBankInfo &RBI)
547 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
548 RBI(RBI),
550#include "AArch64GenGlobalISel.inc"
553#include "AArch64GenGlobalISel.inc"
555{
556}
557
558// FIXME: This should be target-independent, inferred from the types declared
559// for each class in the bank.
560//
561/// Given a register bank, and a type, return the smallest register class that
562/// can represent that combination.
563static const TargetRegisterClass *
564getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
565 bool GetAllRegSet = false) {
566 if (RB.getID() == AArch64::GPRRegBankID) {
567 if (Ty.getSizeInBits() <= 32)
568 return GetAllRegSet ? &AArch64::GPR32allRegClass
569 : &AArch64::GPR32RegClass;
570 if (Ty.getSizeInBits() == 64)
571 return GetAllRegSet ? &AArch64::GPR64allRegClass
572 : &AArch64::GPR64RegClass;
573 if (Ty.getSizeInBits() == 128)
574 return &AArch64::XSeqPairsClassRegClass;
575 return nullptr;
576 }
577
578 if (RB.getID() == AArch64::FPRRegBankID) {
579 switch (Ty.getSizeInBits()) {
580 case 8:
581 return &AArch64::FPR8RegClass;
582 case 16:
583 return &AArch64::FPR16RegClass;
584 case 32:
585 return &AArch64::FPR32RegClass;
586 case 64:
587 return &AArch64::FPR64RegClass;
588 case 128:
589 return &AArch64::FPR128RegClass;
590 }
591 return nullptr;
592 }
593
594 return nullptr;
595}
596
597/// Given a register bank, and size in bits, return the smallest register class
598/// that can represent that combination.
599static const TargetRegisterClass *
600getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
601 bool GetAllRegSet = false) {
602 unsigned RegBankID = RB.getID();
603
604 if (RegBankID == AArch64::GPRRegBankID) {
605 if (SizeInBits <= 32)
606 return GetAllRegSet ? &AArch64::GPR32allRegClass
607 : &AArch64::GPR32RegClass;
608 if (SizeInBits == 64)
609 return GetAllRegSet ? &AArch64::GPR64allRegClass
610 : &AArch64::GPR64RegClass;
611 if (SizeInBits == 128)
612 return &AArch64::XSeqPairsClassRegClass;
613 }
614
615 if (RegBankID == AArch64::FPRRegBankID) {
616 switch (SizeInBits) {
617 default:
618 return nullptr;
619 case 8:
620 return &AArch64::FPR8RegClass;
621 case 16:
622 return &AArch64::FPR16RegClass;
623 case 32:
624 return &AArch64::FPR32RegClass;
625 case 64:
626 return &AArch64::FPR64RegClass;
627 case 128:
628 return &AArch64::FPR128RegClass;
629 }
630 }
631
632 return nullptr;
633}
634
635/// Returns the correct subregister to use for a given register class.
637 const TargetRegisterInfo &TRI, unsigned &SubReg) {
638 switch (TRI.getRegSizeInBits(*RC)) {
639 case 8:
640 SubReg = AArch64::bsub;
641 break;
642 case 16:
643 SubReg = AArch64::hsub;
644 break;
645 case 32:
646 if (RC != &AArch64::FPR32RegClass)
647 SubReg = AArch64::sub_32;
648 else
649 SubReg = AArch64::ssub;
650 break;
651 case 64:
652 SubReg = AArch64::dsub;
653 break;
654 default:
656 dbgs() << "Couldn't find appropriate subregister for register class.");
657 return false;
658 }
659
660 return true;
661}
662
663/// Returns the minimum size the given register bank can hold.
664static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
665 switch (RB.getID()) {
666 case AArch64::GPRRegBankID:
667 return 32;
668 case AArch64::FPRRegBankID:
669 return 8;
670 default:
671 llvm_unreachable("Tried to get minimum size for unknown register bank.");
672 }
673}
674
675/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
676/// Helper function for functions like createDTuple and createQTuple.
677///
678/// \p RegClassIDs - The list of register class IDs available for some tuple of
679/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
680/// expected to contain between 2 and 4 tuple classes.
681///
682/// \p SubRegs - The list of subregister classes associated with each register
683/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
684/// subregister class. The index of each subregister class is expected to
685/// correspond with the index of each register class.
686///
687/// \returns Either the destination register of REG_SEQUENCE instruction that
688/// was created, or the 0th element of \p Regs if \p Regs contains a single
689/// element.
691 const unsigned RegClassIDs[],
692 const unsigned SubRegs[], MachineIRBuilder &MIB) {
693 unsigned NumRegs = Regs.size();
694 if (NumRegs == 1)
695 return Regs[0];
696 assert(NumRegs >= 2 && NumRegs <= 4 &&
697 "Only support between two and 4 registers in a tuple!");
699 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
700 auto RegSequence =
701 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
702 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
703 RegSequence.addUse(Regs[I]);
704 RegSequence.addImm(SubRegs[I]);
705 }
706 return RegSequence.getReg(0);
707}
708
709/// Create a tuple of D-registers using the registers in \p Regs.
711 static const unsigned RegClassIDs[] = {
712 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
713 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
714 AArch64::dsub2, AArch64::dsub3};
715 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
716}
717
718/// Create a tuple of Q-registers using the registers in \p Regs.
720 static const unsigned RegClassIDs[] = {
721 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
722 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
723 AArch64::qsub2, AArch64::qsub3};
724 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
725}
726
727static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
728 auto &MI = *Root.getParent();
729 auto &MBB = *MI.getParent();
730 auto &MF = *MBB.getParent();
731 auto &MRI = MF.getRegInfo();
732 uint64_t Immed;
733 if (Root.isImm())
734 Immed = Root.getImm();
735 else if (Root.isCImm())
736 Immed = Root.getCImm()->getZExtValue();
737 else if (Root.isReg()) {
738 auto ValAndVReg =
740 if (!ValAndVReg)
741 return std::nullopt;
742 Immed = ValAndVReg->Value.getSExtValue();
743 } else
744 return std::nullopt;
745 return Immed;
746}
747
748/// Check whether \p I is a currently unsupported binary operation:
749/// - it has an unsized type
750/// - an operand is not a vreg
751/// - all operands are not in the same bank
752/// These are checks that should someday live in the verifier, but right now,
753/// these are mostly limitations of the aarch64 selector.
754static bool unsupportedBinOp(const MachineInstr &I,
755 const AArch64RegisterBankInfo &RBI,
757 const AArch64RegisterInfo &TRI) {
758 LLT Ty = MRI.getType(I.getOperand(0).getReg());
759 if (!Ty.isValid()) {
760 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
761 return true;
762 }
763
764 const RegisterBank *PrevOpBank = nullptr;
765 for (auto &MO : I.operands()) {
766 // FIXME: Support non-register operands.
767 if (!MO.isReg()) {
768 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
769 return true;
770 }
771
772 // FIXME: Can generic operations have physical registers operands? If
773 // so, this will need to be taught about that, and we'll need to get the
774 // bank out of the minimal class for the register.
775 // Either way, this needs to be documented (and possibly verified).
776 if (!MO.getReg().isVirtual()) {
777 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
778 return true;
779 }
780
781 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
782 if (!OpBank) {
783 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
784 return true;
785 }
786
787 if (PrevOpBank && OpBank != PrevOpBank) {
788 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
789 return true;
790 }
791 PrevOpBank = OpBank;
792 }
793 return false;
794}
795
796/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
797/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
798/// and of size \p OpSize.
799/// \returns \p GenericOpc if the combination is unsupported.
800static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
801 unsigned OpSize) {
802 switch (RegBankID) {
803 case AArch64::GPRRegBankID:
804 if (OpSize == 32) {
805 switch (GenericOpc) {
806 case TargetOpcode::G_SHL:
807 return AArch64::LSLVWr;
808 case TargetOpcode::G_LSHR:
809 return AArch64::LSRVWr;
810 case TargetOpcode::G_ASHR:
811 return AArch64::ASRVWr;
812 default:
813 return GenericOpc;
814 }
815 } else if (OpSize == 64) {
816 switch (GenericOpc) {
817 case TargetOpcode::G_PTR_ADD:
818 return AArch64::ADDXrr;
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVXr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVXr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVXr;
825 default:
826 return GenericOpc;
827 }
828 }
829 break;
830 case AArch64::FPRRegBankID:
831 switch (OpSize) {
832 case 32:
833 switch (GenericOpc) {
834 case TargetOpcode::G_FADD:
835 return AArch64::FADDSrr;
836 case TargetOpcode::G_FSUB:
837 return AArch64::FSUBSrr;
838 case TargetOpcode::G_FMUL:
839 return AArch64::FMULSrr;
840 case TargetOpcode::G_FDIV:
841 return AArch64::FDIVSrr;
842 default:
843 return GenericOpc;
844 }
845 case 64:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDDrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBDrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULDrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVDrr;
855 case TargetOpcode::G_OR:
856 return AArch64::ORRv8i8;
857 default:
858 return GenericOpc;
859 }
860 }
861 break;
862 }
863 return GenericOpc;
864}
865
866/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
867/// appropriate for the (value) register bank \p RegBankID and of memory access
868/// size \p OpSize. This returns the variant with the base+unsigned-immediate
869/// addressing mode (e.g., LDRXui).
870/// \returns \p GenericOpc if the combination is unsupported.
871static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
872 unsigned OpSize) {
873 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
874 switch (RegBankID) {
875 case AArch64::GPRRegBankID:
876 switch (OpSize) {
877 case 8:
878 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
879 case 16:
880 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
881 case 32:
882 return isStore ? AArch64::STRWui : AArch64::LDRWui;
883 case 64:
884 return isStore ? AArch64::STRXui : AArch64::LDRXui;
885 }
886 break;
887 case AArch64::FPRRegBankID:
888 switch (OpSize) {
889 case 8:
890 return isStore ? AArch64::STRBui : AArch64::LDRBui;
891 case 16:
892 return isStore ? AArch64::STRHui : AArch64::LDRHui;
893 case 32:
894 return isStore ? AArch64::STRSui : AArch64::LDRSui;
895 case 64:
896 return isStore ? AArch64::STRDui : AArch64::LDRDui;
897 case 128:
898 return isStore ? AArch64::STRQui : AArch64::LDRQui;
899 }
900 break;
901 }
902 return GenericOpc;
903}
904
905/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
906/// to \p *To.
907///
908/// E.g "To = COPY SrcReg:SubReg"
910 const RegisterBankInfo &RBI, Register SrcReg,
911 const TargetRegisterClass *To, unsigned SubReg) {
912 assert(SrcReg.isValid() && "Expected a valid source register?");
913 assert(To && "Destination register class cannot be null");
914 assert(SubReg && "Expected a valid subregister");
915
916 MachineIRBuilder MIB(I);
917 auto SubRegCopy =
918 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
919 MachineOperand &RegOp = I.getOperand(1);
920 RegOp.setReg(SubRegCopy.getReg(0));
921
922 // It's possible that the destination register won't be constrained. Make
923 // sure that happens.
924 if (!I.getOperand(0).getReg().isPhysical())
925 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
926
927 return true;
928}
929
930/// Helper function to get the source and destination register classes for a
931/// copy. Returns a std::pair containing the source register class for the
932/// copy, and the destination register class for the copy. If a register class
933/// cannot be determined, then it will be nullptr.
934static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
937 const RegisterBankInfo &RBI) {
938 Register DstReg = I.getOperand(0).getReg();
939 Register SrcReg = I.getOperand(1).getReg();
940 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
941 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
942 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
943 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
944
945 // Special casing for cross-bank copies of s1s. We can technically represent
946 // a 1-bit value with any size of register. The minimum size for a GPR is 32
947 // bits. So, we need to put the FPR on 32 bits as well.
948 //
949 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
950 // then we can pull it into the helpers that get the appropriate class for a
951 // register bank. Or make a new helper that carries along some constraint
952 // information.
953 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
954 SrcSize = DstSize = 32;
955
956 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
957 getMinClassForRegBank(DstRegBank, DstSize, true)};
958}
959
960// FIXME: We need some sort of API in RBI/TRI to allow generic code to
961// constrain operands of simple instructions given a TargetRegisterClass
962// and LLT
964 const RegisterBankInfo &RBI) {
965 for (MachineOperand &MO : I.operands()) {
966 if (!MO.isReg())
967 continue;
968 Register Reg = MO.getReg();
969 if (!Reg)
970 continue;
971 if (Reg.isPhysical())
972 continue;
973 LLT Ty = MRI.getType(Reg);
974 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
975 const TargetRegisterClass *RC =
976 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
977 if (!RC) {
978 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
979 RC = getRegClassForTypeOnBank(Ty, RB);
980 if (!RC) {
982 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
983 break;
984 }
985 }
986 RBI.constrainGenericRegister(Reg, *RC, MRI);
987 }
988
989 return true;
990}
991
994 const RegisterBankInfo &RBI) {
995 Register DstReg = I.getOperand(0).getReg();
996 Register SrcReg = I.getOperand(1).getReg();
997 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
998 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
999
1000 // Find the correct register classes for the source and destination registers.
1001 const TargetRegisterClass *SrcRC;
1002 const TargetRegisterClass *DstRC;
1003 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1004
1005 if (!DstRC) {
1006 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1007 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1008 return false;
1009 }
1010
1011 // Is this a copy? If so, then we may need to insert a subregister copy.
1012 if (I.isCopy()) {
1013 // Yes. Check if there's anything to fix up.
1014 if (!SrcRC) {
1015 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1016 return false;
1017 }
1018
1019 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
1020 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
1021 unsigned SubReg;
1022
1023 // If the source bank doesn't support a subregister copy small enough,
1024 // then we first need to copy to the destination bank.
1025 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1026 const TargetRegisterClass *DstTempRC =
1027 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1028 getSubRegForClass(DstRC, TRI, SubReg);
1029
1030 MachineIRBuilder MIB(I);
1031 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1032 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1033 } else if (SrcSize > DstSize) {
1034 // If the source register is bigger than the destination we need to
1035 // perform a subregister copy.
1036 const TargetRegisterClass *SubRegRC =
1037 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1038 getSubRegForClass(SubRegRC, TRI, SubReg);
1039 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1040 } else if (DstSize > SrcSize) {
1041 // If the destination register is bigger than the source we need to do
1042 // a promotion using SUBREG_TO_REG.
1043 const TargetRegisterClass *PromotionRC =
1044 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1045 getSubRegForClass(SrcRC, TRI, SubReg);
1046
1047 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1048 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1049 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1050 .addImm(0)
1051 .addUse(SrcReg)
1052 .addImm(SubReg);
1053 MachineOperand &RegOp = I.getOperand(1);
1054 RegOp.setReg(PromoteReg);
1055 }
1056
1057 // If the destination is a physical register, then there's nothing to
1058 // change, so we're done.
1059 if (DstReg.isPhysical())
1060 return true;
1061 }
1062
1063 // No need to constrain SrcReg. It will get constrained when we hit another
1064 // of its use or its defs. Copies do not have constraints.
1065 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1066 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1067 << " operand\n");
1068 return false;
1069 }
1070
1071 // If this a GPR ZEXT that we want to just reduce down into a copy.
1072 // The sizes will be mismatched with the source < 32b but that's ok.
1073 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1074 I.setDesc(TII.get(AArch64::COPY));
1075 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1076 return selectCopy(I, TII, MRI, TRI, RBI);
1077 }
1078
1079 I.setDesc(TII.get(AArch64::COPY));
1080 return true;
1081}
1082
1083static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1084 if (!DstTy.isScalar() || !SrcTy.isScalar())
1085 return GenericOpc;
1086
1087 const unsigned DstSize = DstTy.getSizeInBits();
1088 const unsigned SrcSize = SrcTy.getSizeInBits();
1089
1090 switch (DstSize) {
1091 case 32:
1092 switch (SrcSize) {
1093 case 32:
1094 switch (GenericOpc) {
1095 case TargetOpcode::G_SITOFP:
1096 return AArch64::SCVTFUWSri;
1097 case TargetOpcode::G_UITOFP:
1098 return AArch64::UCVTFUWSri;
1099 case TargetOpcode::G_FPTOSI:
1100 return AArch64::FCVTZSUWSr;
1101 case TargetOpcode::G_FPTOUI:
1102 return AArch64::FCVTZUUWSr;
1103 default:
1104 return GenericOpc;
1105 }
1106 case 64:
1107 switch (GenericOpc) {
1108 case TargetOpcode::G_SITOFP:
1109 return AArch64::SCVTFUXSri;
1110 case TargetOpcode::G_UITOFP:
1111 return AArch64::UCVTFUXSri;
1112 case TargetOpcode::G_FPTOSI:
1113 return AArch64::FCVTZSUWDr;
1114 case TargetOpcode::G_FPTOUI:
1115 return AArch64::FCVTZUUWDr;
1116 default:
1117 return GenericOpc;
1118 }
1119 default:
1120 return GenericOpc;
1121 }
1122 case 64:
1123 switch (SrcSize) {
1124 case 32:
1125 switch (GenericOpc) {
1126 case TargetOpcode::G_SITOFP:
1127 return AArch64::SCVTFUWDri;
1128 case TargetOpcode::G_UITOFP:
1129 return AArch64::UCVTFUWDri;
1130 case TargetOpcode::G_FPTOSI:
1131 return AArch64::FCVTZSUXSr;
1132 case TargetOpcode::G_FPTOUI:
1133 return AArch64::FCVTZUUXSr;
1134 default:
1135 return GenericOpc;
1136 }
1137 case 64:
1138 switch (GenericOpc) {
1139 case TargetOpcode::G_SITOFP:
1140 return AArch64::SCVTFUXDri;
1141 case TargetOpcode::G_UITOFP:
1142 return AArch64::UCVTFUXDri;
1143 case TargetOpcode::G_FPTOSI:
1144 return AArch64::FCVTZSUXDr;
1145 case TargetOpcode::G_FPTOUI:
1146 return AArch64::FCVTZUUXDr;
1147 default:
1148 return GenericOpc;
1149 }
1150 default:
1151 return GenericOpc;
1152 }
1153 default:
1154 return GenericOpc;
1155 };
1156 return GenericOpc;
1157}
1158
1160AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1162 MachineIRBuilder &MIB) const {
1163 MachineRegisterInfo &MRI = *MIB.getMRI();
1164 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1165 RBI.getRegBank(True, MRI, TRI)->getID() &&
1166 "Expected both select operands to have the same regbank?");
1167 LLT Ty = MRI.getType(True);
1168 if (Ty.isVector())
1169 return nullptr;
1170 const unsigned Size = Ty.getSizeInBits();
1171 assert((Size == 32 || Size == 64) &&
1172 "Expected 32 bit or 64 bit select only?");
1173 const bool Is32Bit = Size == 32;
1174 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1175 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1176 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1178 return &*FCSel;
1179 }
1180
1181 // By default, we'll try and emit a CSEL.
1182 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1183 bool Optimized = false;
1184 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1185 &Optimized](Register &Reg, Register &OtherReg,
1186 bool Invert) {
1187 if (Optimized)
1188 return false;
1189
1190 // Attempt to fold:
1191 //
1192 // %sub = G_SUB 0, %x
1193 // %select = G_SELECT cc, %reg, %sub
1194 //
1195 // Into:
1196 // %select = CSNEG %reg, %x, cc
1197 Register MatchReg;
1198 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1199 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1200 Reg = MatchReg;
1201 if (Invert) {
1203 std::swap(Reg, OtherReg);
1204 }
1205 return true;
1206 }
1207
1208 // Attempt to fold:
1209 //
1210 // %xor = G_XOR %x, -1
1211 // %select = G_SELECT cc, %reg, %xor
1212 //
1213 // Into:
1214 // %select = CSINV %reg, %x, cc
1215 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1216 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1217 Reg = MatchReg;
1218 if (Invert) {
1220 std::swap(Reg, OtherReg);
1221 }
1222 return true;
1223 }
1224
1225 // Attempt to fold:
1226 //
1227 // %add = G_ADD %x, 1
1228 // %select = G_SELECT cc, %reg, %add
1229 //
1230 // Into:
1231 // %select = CSINC %reg, %x, cc
1232 if (mi_match(Reg, MRI,
1233 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1234 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1235 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1236 Reg = MatchReg;
1237 if (Invert) {
1239 std::swap(Reg, OtherReg);
1240 }
1241 return true;
1242 }
1243
1244 return false;
1245 };
1246
1247 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1248 // true/false values are constants.
1249 // FIXME: All of these patterns already exist in tablegen. We should be
1250 // able to import these.
1251 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1252 &Optimized]() {
1253 if (Optimized)
1254 return false;
1255 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1256 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1257 if (!TrueCst && !FalseCst)
1258 return false;
1259
1260 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1261 if (TrueCst && FalseCst) {
1262 int64_t T = TrueCst->Value.getSExtValue();
1263 int64_t F = FalseCst->Value.getSExtValue();
1264
1265 if (T == 0 && F == 1) {
1266 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1267 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1268 True = ZReg;
1269 False = ZReg;
1270 return true;
1271 }
1272
1273 if (T == 0 && F == -1) {
1274 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1275 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1276 True = ZReg;
1277 False = ZReg;
1278 return true;
1279 }
1280 }
1281
1282 if (TrueCst) {
1283 int64_t T = TrueCst->Value.getSExtValue();
1284 if (T == 1) {
1285 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1286 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1287 True = False;
1288 False = ZReg;
1290 return true;
1291 }
1292
1293 if (T == -1) {
1294 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1295 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1296 True = False;
1297 False = ZReg;
1299 return true;
1300 }
1301 }
1302
1303 if (FalseCst) {
1304 int64_t F = FalseCst->Value.getSExtValue();
1305 if (F == 1) {
1306 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1307 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1308 False = ZReg;
1309 return true;
1310 }
1311
1312 if (F == -1) {
1313 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1314 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1315 False = ZReg;
1316 return true;
1317 }
1318 }
1319 return false;
1320 };
1321
1322 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1323 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1324 Optimized |= TryOptSelectCst();
1325 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1327 return &*SelectInst;
1328}
1329
1331 switch (P) {
1332 default:
1333 llvm_unreachable("Unknown condition code!");
1334 case CmpInst::ICMP_NE:
1335 return AArch64CC::NE;
1336 case CmpInst::ICMP_EQ:
1337 return AArch64CC::EQ;
1338 case CmpInst::ICMP_SGT:
1339 return AArch64CC::GT;
1340 case CmpInst::ICMP_SGE:
1341 return AArch64CC::GE;
1342 case CmpInst::ICMP_SLT:
1343 return AArch64CC::LT;
1344 case CmpInst::ICMP_SLE:
1345 return AArch64CC::LE;
1346 case CmpInst::ICMP_UGT:
1347 return AArch64CC::HI;
1348 case CmpInst::ICMP_UGE:
1349 return AArch64CC::HS;
1350 case CmpInst::ICMP_ULT:
1351 return AArch64CC::LO;
1352 case CmpInst::ICMP_ULE:
1353 return AArch64CC::LS;
1354 }
1355}
1356
1357/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1359 AArch64CC::CondCode &CondCode,
1360 AArch64CC::CondCode &CondCode2) {
1361 CondCode2 = AArch64CC::AL;
1362 switch (CC) {
1363 default:
1364 llvm_unreachable("Unknown FP condition!");
1365 case CmpInst::FCMP_OEQ:
1366 CondCode = AArch64CC::EQ;
1367 break;
1368 case CmpInst::FCMP_OGT:
1369 CondCode = AArch64CC::GT;
1370 break;
1371 case CmpInst::FCMP_OGE:
1372 CondCode = AArch64CC::GE;
1373 break;
1374 case CmpInst::FCMP_OLT:
1375 CondCode = AArch64CC::MI;
1376 break;
1377 case CmpInst::FCMP_OLE:
1378 CondCode = AArch64CC::LS;
1379 break;
1380 case CmpInst::FCMP_ONE:
1381 CondCode = AArch64CC::MI;
1382 CondCode2 = AArch64CC::GT;
1383 break;
1384 case CmpInst::FCMP_ORD:
1385 CondCode = AArch64CC::VC;
1386 break;
1387 case CmpInst::FCMP_UNO:
1388 CondCode = AArch64CC::VS;
1389 break;
1390 case CmpInst::FCMP_UEQ:
1391 CondCode = AArch64CC::EQ;
1392 CondCode2 = AArch64CC::VS;
1393 break;
1394 case CmpInst::FCMP_UGT:
1395 CondCode = AArch64CC::HI;
1396 break;
1397 case CmpInst::FCMP_UGE:
1398 CondCode = AArch64CC::PL;
1399 break;
1400 case CmpInst::FCMP_ULT:
1401 CondCode = AArch64CC::LT;
1402 break;
1403 case CmpInst::FCMP_ULE:
1404 CondCode = AArch64CC::LE;
1405 break;
1406 case CmpInst::FCMP_UNE:
1407 CondCode = AArch64CC::NE;
1408 break;
1409 }
1410}
1411
1412/// Convert an IR fp condition code to an AArch64 CC.
1413/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1414/// should be AND'ed instead of OR'ed.
1416 AArch64CC::CondCode &CondCode,
1417 AArch64CC::CondCode &CondCode2) {
1418 CondCode2 = AArch64CC::AL;
1419 switch (CC) {
1420 default:
1421 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1422 assert(CondCode2 == AArch64CC::AL);
1423 break;
1424 case CmpInst::FCMP_ONE:
1425 // (a one b)
1426 // == ((a olt b) || (a ogt b))
1427 // == ((a ord b) && (a une b))
1428 CondCode = AArch64CC::VC;
1429 CondCode2 = AArch64CC::NE;
1430 break;
1431 case CmpInst::FCMP_UEQ:
1432 // (a ueq b)
1433 // == ((a uno b) || (a oeq b))
1434 // == ((a ule b) && (a uge b))
1435 CondCode = AArch64CC::PL;
1436 CondCode2 = AArch64CC::LE;
1437 break;
1438 }
1439}
1440
1441/// Return a register which can be used as a bit to test in a TB(N)Z.
1442static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1444 assert(Reg.isValid() && "Expected valid register!");
1445 bool HasZext = false;
1446 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1447 unsigned Opc = MI->getOpcode();
1448
1449 if (!MI->getOperand(0).isReg() ||
1450 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1451 break;
1452
1453 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1454 //
1455 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1456 // on the truncated x is the same as the bit number on x.
1457 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1458 Opc == TargetOpcode::G_TRUNC) {
1459 if (Opc == TargetOpcode::G_ZEXT)
1460 HasZext = true;
1461
1462 Register NextReg = MI->getOperand(1).getReg();
1463 // Did we find something worth folding?
1464 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1465 break;
1466
1467 // NextReg is worth folding. Keep looking.
1468 Reg = NextReg;
1469 continue;
1470 }
1471
1472 // Attempt to find a suitable operation with a constant on one side.
1473 std::optional<uint64_t> C;
1474 Register TestReg;
1475 switch (Opc) {
1476 default:
1477 break;
1478 case TargetOpcode::G_AND:
1479 case TargetOpcode::G_XOR: {
1480 TestReg = MI->getOperand(1).getReg();
1481 Register ConstantReg = MI->getOperand(2).getReg();
1482 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1483 if (!VRegAndVal) {
1484 // AND commutes, check the other side for a constant.
1485 // FIXME: Can we canonicalize the constant so that it's always on the
1486 // same side at some point earlier?
1487 std::swap(ConstantReg, TestReg);
1488 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1489 }
1490 if (VRegAndVal) {
1491 if (HasZext)
1492 C = VRegAndVal->Value.getZExtValue();
1493 else
1494 C = VRegAndVal->Value.getSExtValue();
1495 }
1496 break;
1497 }
1498 case TargetOpcode::G_ASHR:
1499 case TargetOpcode::G_LSHR:
1500 case TargetOpcode::G_SHL: {
1501 TestReg = MI->getOperand(1).getReg();
1502 auto VRegAndVal =
1503 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1504 if (VRegAndVal)
1505 C = VRegAndVal->Value.getSExtValue();
1506 break;
1507 }
1508 }
1509
1510 // Didn't find a constant or viable register. Bail out of the loop.
1511 if (!C || !TestReg.isValid())
1512 break;
1513
1514 // We found a suitable instruction with a constant. Check to see if we can
1515 // walk through the instruction.
1516 Register NextReg;
1517 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1518 switch (Opc) {
1519 default:
1520 break;
1521 case TargetOpcode::G_AND:
1522 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1523 if ((*C >> Bit) & 1)
1524 NextReg = TestReg;
1525 break;
1526 case TargetOpcode::G_SHL:
1527 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1528 // the type of the register.
1529 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1530 NextReg = TestReg;
1531 Bit = Bit - *C;
1532 }
1533 break;
1534 case TargetOpcode::G_ASHR:
1535 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1536 // in x
1537 NextReg = TestReg;
1538 Bit = Bit + *C;
1539 if (Bit >= TestRegSize)
1540 Bit = TestRegSize - 1;
1541 break;
1542 case TargetOpcode::G_LSHR:
1543 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1544 if ((Bit + *C) < TestRegSize) {
1545 NextReg = TestReg;
1546 Bit = Bit + *C;
1547 }
1548 break;
1549 case TargetOpcode::G_XOR:
1550 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1551 // appropriate.
1552 //
1553 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1554 //
1555 // tbz x', b -> tbnz x, b
1556 //
1557 // Because x' only has the b-th bit set if x does not.
1558 if ((*C >> Bit) & 1)
1559 Invert = !Invert;
1560 NextReg = TestReg;
1561 break;
1562 }
1563
1564 // Check if we found anything worth folding.
1565 if (!NextReg.isValid())
1566 return Reg;
1567 Reg = NextReg;
1568 }
1569
1570 return Reg;
1571}
1572
1573MachineInstr *AArch64InstructionSelector::emitTestBit(
1574 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1575 MachineIRBuilder &MIB) const {
1576 assert(TestReg.isValid());
1577 assert(ProduceNonFlagSettingCondBr &&
1578 "Cannot emit TB(N)Z with speculation tracking!");
1579 MachineRegisterInfo &MRI = *MIB.getMRI();
1580
1581 // Attempt to optimize the test bit by walking over instructions.
1582 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1583 LLT Ty = MRI.getType(TestReg);
1584 unsigned Size = Ty.getSizeInBits();
1585 assert(!Ty.isVector() && "Expected a scalar!");
1586 assert(Bit < 64 && "Bit is too large!");
1587
1588 // When the test register is a 64-bit register, we have to narrow to make
1589 // TBNZW work.
1590 bool UseWReg = Bit < 32;
1591 unsigned NecessarySize = UseWReg ? 32 : 64;
1592 if (Size != NecessarySize)
1593 TestReg = moveScalarRegClass(
1594 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1595 MIB);
1596
1597 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1598 {AArch64::TBZW, AArch64::TBNZW}};
1599 unsigned Opc = OpcTable[UseWReg][IsNegative];
1600 auto TestBitMI =
1601 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1602 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1603 return &*TestBitMI;
1604}
1605
1606bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1607 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1608 MachineIRBuilder &MIB) const {
1609 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1610 // Given something like this:
1611 //
1612 // %x = ...Something...
1613 // %one = G_CONSTANT i64 1
1614 // %zero = G_CONSTANT i64 0
1615 // %and = G_AND %x, %one
1616 // %cmp = G_ICMP intpred(ne), %and, %zero
1617 // %cmp_trunc = G_TRUNC %cmp
1618 // G_BRCOND %cmp_trunc, %bb.3
1619 //
1620 // We want to try and fold the AND into the G_BRCOND and produce either a
1621 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1622 //
1623 // In this case, we'd get
1624 //
1625 // TBNZ %x %bb.3
1626 //
1627
1628 // Check if the AND has a constant on its RHS which we can use as a mask.
1629 // If it's a power of 2, then it's the same as checking a specific bit.
1630 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1631 auto MaybeBit = getIConstantVRegValWithLookThrough(
1632 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1633 if (!MaybeBit)
1634 return false;
1635
1636 int32_t Bit = MaybeBit->Value.exactLogBase2();
1637 if (Bit < 0)
1638 return false;
1639
1640 Register TestReg = AndInst.getOperand(1).getReg();
1641
1642 // Emit a TB(N)Z.
1643 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1644 return true;
1645}
1646
1647MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1648 bool IsNegative,
1649 MachineBasicBlock *DestMBB,
1650 MachineIRBuilder &MIB) const {
1651 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1652 MachineRegisterInfo &MRI = *MIB.getMRI();
1653 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1654 AArch64::GPRRegBankID &&
1655 "Expected GPRs only?");
1656 auto Ty = MRI.getType(CompareReg);
1657 unsigned Width = Ty.getSizeInBits();
1658 assert(!Ty.isVector() && "Expected scalar only?");
1659 assert(Width <= 64 && "Expected width to be at most 64?");
1660 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1661 {AArch64::CBNZW, AArch64::CBNZX}};
1662 unsigned Opc = OpcTable[IsNegative][Width == 64];
1663 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1664 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1665 return &*BranchMI;
1666}
1667
1668bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1669 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1670 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1671 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1672 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1673 // totally clean. Some of them require two branches to implement.
1674 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1675 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1676 Pred);
1677 AArch64CC::CondCode CC1, CC2;
1678 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1679 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1680 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1681 if (CC2 != AArch64CC::AL)
1682 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1683 I.eraseFromParent();
1684 return true;
1685}
1686
1687bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1688 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1689 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1690 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1691 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1692 //
1693 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1694 // instructions will not be produced, as they are conditional branch
1695 // instructions that do not set flags.
1696 if (!ProduceNonFlagSettingCondBr)
1697 return false;
1698
1699 MachineRegisterInfo &MRI = *MIB.getMRI();
1700 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1701 auto Pred =
1702 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1703 Register LHS = ICmp.getOperand(2).getReg();
1704 Register RHS = ICmp.getOperand(3).getReg();
1705
1706 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1707 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1708 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1709
1710 // When we can emit a TB(N)Z, prefer that.
1711 //
1712 // Handle non-commutative condition codes first.
1713 // Note that we don't want to do this when we have a G_AND because it can
1714 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1715 if (VRegAndVal && !AndInst) {
1716 int64_t C = VRegAndVal->Value.getSExtValue();
1717
1718 // When we have a greater-than comparison, we can just test if the msb is
1719 // zero.
1720 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1721 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1722 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1723 I.eraseFromParent();
1724 return true;
1725 }
1726
1727 // When we have a less than comparison, we can just test if the msb is not
1728 // zero.
1729 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1730 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1731 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1732 I.eraseFromParent();
1733 return true;
1734 }
1735
1736 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1737 // we can test if the msb is zero.
1738 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1739 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1740 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1741 I.eraseFromParent();
1742 return true;
1743 }
1744 }
1745
1746 // Attempt to handle commutative condition codes. Right now, that's only
1747 // eq/ne.
1748 if (ICmpInst::isEquality(Pred)) {
1749 if (!VRegAndVal) {
1750 std::swap(RHS, LHS);
1751 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1752 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1753 }
1754
1755 if (VRegAndVal && VRegAndVal->Value == 0) {
1756 // If there's a G_AND feeding into this branch, try to fold it away by
1757 // emitting a TB(N)Z instead.
1758 //
1759 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1760 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1761 // would be redundant.
1762 if (AndInst &&
1763 tryOptAndIntoCompareBranch(
1764 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1765 I.eraseFromParent();
1766 return true;
1767 }
1768
1769 // Otherwise, try to emit a CB(N)Z instead.
1770 auto LHSTy = MRI.getType(LHS);
1771 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1772 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1773 I.eraseFromParent();
1774 return true;
1775 }
1776 }
1777 }
1778
1779 return false;
1780}
1781
1782bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1783 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1784 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1785 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1786 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1787 return true;
1788
1789 // Couldn't optimize. Emit a compare + a Bcc.
1790 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1791 auto PredOp = ICmp.getOperand(1);
1792 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1794 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1795 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1796 I.eraseFromParent();
1797 return true;
1798}
1799
1800bool AArch64InstructionSelector::selectCompareBranch(
1802 Register CondReg = I.getOperand(0).getReg();
1803 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1804 // Try to select the G_BRCOND using whatever is feeding the condition if
1805 // possible.
1806 unsigned CCMIOpc = CCMI->getOpcode();
1807 if (CCMIOpc == TargetOpcode::G_FCMP)
1808 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1809 if (CCMIOpc == TargetOpcode::G_ICMP)
1810 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1811
1812 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1813 // instructions will not be produced, as they are conditional branch
1814 // instructions that do not set flags.
1815 if (ProduceNonFlagSettingCondBr) {
1816 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1817 I.getOperand(1).getMBB(), MIB);
1818 I.eraseFromParent();
1819 return true;
1820 }
1821
1822 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1823 auto TstMI =
1824 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1826 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1828 .addMBB(I.getOperand(1).getMBB());
1829 I.eraseFromParent();
1830 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1831}
1832
1833/// Returns the element immediate value of a vector shift operand if found.
1834/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1835static std::optional<int64_t> getVectorShiftImm(Register Reg,
1837 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1838 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1839 return getAArch64VectorSplatScalar(*OpMI, MRI);
1840}
1841
1842/// Matches and returns the shift immediate value for a SHL instruction given
1843/// a shift operand.
1844static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1846 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1847 if (!ShiftImm)
1848 return std::nullopt;
1849 // Check the immediate is in range for a SHL.
1850 int64_t Imm = *ShiftImm;
1851 if (Imm < 0)
1852 return std::nullopt;
1853 switch (SrcTy.getElementType().getSizeInBits()) {
1854 default:
1855 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1856 return std::nullopt;
1857 case 8:
1858 if (Imm > 7)
1859 return std::nullopt;
1860 break;
1861 case 16:
1862 if (Imm > 15)
1863 return std::nullopt;
1864 break;
1865 case 32:
1866 if (Imm > 31)
1867 return std::nullopt;
1868 break;
1869 case 64:
1870 if (Imm > 63)
1871 return std::nullopt;
1872 break;
1873 }
1874 return Imm;
1875}
1876
1877bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1879 assert(I.getOpcode() == TargetOpcode::G_SHL);
1880 Register DstReg = I.getOperand(0).getReg();
1881 const LLT Ty = MRI.getType(DstReg);
1882 Register Src1Reg = I.getOperand(1).getReg();
1883 Register Src2Reg = I.getOperand(2).getReg();
1884
1885 if (!Ty.isVector())
1886 return false;
1887
1888 // Check if we have a vector of constants on RHS that we can select as the
1889 // immediate form.
1890 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1891
1892 unsigned Opc = 0;
1893 if (Ty == LLT::fixed_vector(2, 64)) {
1894 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1895 } else if (Ty == LLT::fixed_vector(4, 32)) {
1896 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1897 } else if (Ty == LLT::fixed_vector(2, 32)) {
1898 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1899 } else if (Ty == LLT::fixed_vector(4, 16)) {
1900 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1901 } else if (Ty == LLT::fixed_vector(8, 16)) {
1902 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1903 } else if (Ty == LLT::fixed_vector(16, 8)) {
1904 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1905 } else if (Ty == LLT::fixed_vector(8, 8)) {
1906 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1907 } else {
1908 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1909 return false;
1910 }
1911
1912 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1913 if (ImmVal)
1914 Shl.addImm(*ImmVal);
1915 else
1916 Shl.addUse(Src2Reg);
1918 I.eraseFromParent();
1919 return true;
1920}
1921
1922bool AArch64InstructionSelector::selectVectorAshrLshr(
1924 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1925 I.getOpcode() == TargetOpcode::G_LSHR);
1926 Register DstReg = I.getOperand(0).getReg();
1927 const LLT Ty = MRI.getType(DstReg);
1928 Register Src1Reg = I.getOperand(1).getReg();
1929 Register Src2Reg = I.getOperand(2).getReg();
1930
1931 if (!Ty.isVector())
1932 return false;
1933
1934 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1935
1936 // We expect the immediate case to be lowered in the PostLegalCombiner to
1937 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1938
1939 // There is not a shift right register instruction, but the shift left
1940 // register instruction takes a signed value, where negative numbers specify a
1941 // right shift.
1942
1943 unsigned Opc = 0;
1944 unsigned NegOpc = 0;
1945 const TargetRegisterClass *RC =
1946 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1947 if (Ty == LLT::fixed_vector(2, 64)) {
1948 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1949 NegOpc = AArch64::NEGv2i64;
1950 } else if (Ty == LLT::fixed_vector(4, 32)) {
1951 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1952 NegOpc = AArch64::NEGv4i32;
1953 } else if (Ty == LLT::fixed_vector(2, 32)) {
1954 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1955 NegOpc = AArch64::NEGv2i32;
1956 } else if (Ty == LLT::fixed_vector(4, 16)) {
1957 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1958 NegOpc = AArch64::NEGv4i16;
1959 } else if (Ty == LLT::fixed_vector(8, 16)) {
1960 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1961 NegOpc = AArch64::NEGv8i16;
1962 } else if (Ty == LLT::fixed_vector(16, 8)) {
1963 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1964 NegOpc = AArch64::NEGv16i8;
1965 } else if (Ty == LLT::fixed_vector(8, 8)) {
1966 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1967 NegOpc = AArch64::NEGv8i8;
1968 } else {
1969 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1970 return false;
1971 }
1972
1973 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1975 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1977 I.eraseFromParent();
1978 return true;
1979}
1980
1981bool AArch64InstructionSelector::selectVaStartAAPCS(
1983 return false;
1984}
1985
1986bool AArch64InstructionSelector::selectVaStartDarwin(
1989 Register ListReg = I.getOperand(0).getReg();
1990
1991 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1992
1993 int FrameIdx = FuncInfo->getVarArgsStackIndex();
1995 MF.getFunction().getCallingConv())) {
1996 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
1997 ? FuncInfo->getVarArgsGPRIndex()
1998 : FuncInfo->getVarArgsStackIndex();
1999 }
2000
2001 auto MIB =
2002 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2003 .addDef(ArgsAddrReg)
2004 .addFrameIndex(FrameIdx)
2005 .addImm(0)
2006 .addImm(0);
2007
2009
2010 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2011 .addUse(ArgsAddrReg)
2012 .addUse(ListReg)
2013 .addImm(0)
2014 .addMemOperand(*I.memoperands_begin());
2015
2017 I.eraseFromParent();
2018 return true;
2019}
2020
2021void AArch64InstructionSelector::materializeLargeCMVal(
2022 MachineInstr &I, const Value *V, unsigned OpFlags) {
2023 MachineBasicBlock &MBB = *I.getParent();
2024 MachineFunction &MF = *MBB.getParent();
2026
2027 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2028 MovZ->addOperand(MF, I.getOperand(1));
2029 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2031 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2033
2034 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2035 Register ForceDstReg) {
2036 Register DstReg = ForceDstReg
2037 ? ForceDstReg
2038 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2039 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2040 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2042 GV, MovZ->getOperand(1).getOffset(), Flags));
2043 } else {
2044 MovI->addOperand(
2045 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2046 MovZ->getOperand(1).getOffset(), Flags));
2047 }
2048 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2050 return DstReg;
2051 };
2052 Register DstReg = BuildMovK(MovZ.getReg(0),
2054 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2055 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2056}
2057
2058bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2059 MachineBasicBlock &MBB = *I.getParent();
2060 MachineFunction &MF = *MBB.getParent();
2062
2063 switch (I.getOpcode()) {
2064 case TargetOpcode::G_STORE: {
2065 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2066 MachineOperand &SrcOp = I.getOperand(0);
2067 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2068 // Allow matching with imported patterns for stores of pointers. Unlike
2069 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2070 // and constrain.
2071 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2072 Register NewSrc = Copy.getReg(0);
2073 SrcOp.setReg(NewSrc);
2074 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2075 Changed = true;
2076 }
2077 return Changed;
2078 }
2079 case TargetOpcode::G_PTR_ADD:
2080 return convertPtrAddToAdd(I, MRI);
2081 case TargetOpcode::G_LOAD: {
2082 // For scalar loads of pointers, we try to convert the dest type from p0
2083 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2084 // conversion, this should be ok because all users should have been
2085 // selected already, so the type doesn't matter for them.
2086 Register DstReg = I.getOperand(0).getReg();
2087 const LLT DstTy = MRI.getType(DstReg);
2088 if (!DstTy.isPointer())
2089 return false;
2090 MRI.setType(DstReg, LLT::scalar(64));
2091 return true;
2092 }
2093 case AArch64::G_DUP: {
2094 // Convert the type from p0 to s64 to help selection.
2095 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2096 if (!DstTy.isPointerVector())
2097 return false;
2098 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2099 MRI.setType(I.getOperand(0).getReg(),
2100 DstTy.changeElementType(LLT::scalar(64)));
2101 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2102 I.getOperand(1).setReg(NewSrc.getReg(0));
2103 return true;
2104 }
2105 case TargetOpcode::G_UITOFP:
2106 case TargetOpcode::G_SITOFP: {
2107 // If both source and destination regbanks are FPR, then convert the opcode
2108 // to G_SITOF so that the importer can select it to an fpr variant.
2109 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2110 // copy.
2111 Register SrcReg = I.getOperand(1).getReg();
2112 LLT SrcTy = MRI.getType(SrcReg);
2113 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2114 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2115 return false;
2116
2117 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2118 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2119 I.setDesc(TII.get(AArch64::G_SITOF));
2120 else
2121 I.setDesc(TII.get(AArch64::G_UITOF));
2122 return true;
2123 }
2124 return false;
2125 }
2126 default:
2127 return false;
2128 }
2129}
2130
2131/// This lowering tries to look for G_PTR_ADD instructions and then converts
2132/// them to a standard G_ADD with a COPY on the source.
2133///
2134/// The motivation behind this is to expose the add semantics to the imported
2135/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2136/// because the selector works bottom up, uses before defs. By the time we
2137/// end up trying to select a G_PTR_ADD, we should have already attempted to
2138/// fold this into addressing modes and were therefore unsuccessful.
2139bool AArch64InstructionSelector::convertPtrAddToAdd(
2141 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2142 Register DstReg = I.getOperand(0).getReg();
2143 Register AddOp1Reg = I.getOperand(1).getReg();
2144 const LLT PtrTy = MRI.getType(DstReg);
2145 if (PtrTy.getAddressSpace() != 0)
2146 return false;
2147
2148 const LLT CastPtrTy =
2149 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2150 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2151 // Set regbanks on the registers.
2152 if (PtrTy.isVector())
2153 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2154 else
2155 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2156
2157 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2158 // %dst(intty) = G_ADD %intbase, off
2159 I.setDesc(TII.get(TargetOpcode::G_ADD));
2160 MRI.setType(DstReg, CastPtrTy);
2161 I.getOperand(1).setReg(PtrToInt.getReg(0));
2162 if (!select(*PtrToInt)) {
2163 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2164 return false;
2165 }
2166
2167 // Also take the opportunity here to try to do some optimization.
2168 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2169 Register NegatedReg;
2170 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2171 return true;
2172 I.getOperand(2).setReg(NegatedReg);
2173 I.setDesc(TII.get(TargetOpcode::G_SUB));
2174 return true;
2175}
2176
2177bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2179 // We try to match the immediate variant of LSL, which is actually an alias
2180 // for a special case of UBFM. Otherwise, we fall back to the imported
2181 // selector which will match the register variant.
2182 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2183 const auto &MO = I.getOperand(2);
2184 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2185 if (!VRegAndVal)
2186 return false;
2187
2188 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2189 if (DstTy.isVector())
2190 return false;
2191 bool Is64Bit = DstTy.getSizeInBits() == 64;
2192 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2193 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2194
2195 if (!Imm1Fn || !Imm2Fn)
2196 return false;
2197
2198 auto NewI =
2199 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2200 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2201
2202 for (auto &RenderFn : *Imm1Fn)
2203 RenderFn(NewI);
2204 for (auto &RenderFn : *Imm2Fn)
2205 RenderFn(NewI);
2206
2207 I.eraseFromParent();
2208 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2209}
2210
2211bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2213 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2214 // If we're storing a scalar, it doesn't matter what register bank that
2215 // scalar is on. All that matters is the size.
2216 //
2217 // So, if we see something like this (with a 32-bit scalar as an example):
2218 //
2219 // %x:gpr(s32) = ... something ...
2220 // %y:fpr(s32) = COPY %x:gpr(s32)
2221 // G_STORE %y:fpr(s32)
2222 //
2223 // We can fix this up into something like this:
2224 //
2225 // G_STORE %x:gpr(s32)
2226 //
2227 // And then continue the selection process normally.
2228 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2229 if (!DefDstReg.isValid())
2230 return false;
2231 LLT DefDstTy = MRI.getType(DefDstReg);
2232 Register StoreSrcReg = I.getOperand(0).getReg();
2233 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2234
2235 // If we get something strange like a physical register, then we shouldn't
2236 // go any further.
2237 if (!DefDstTy.isValid())
2238 return false;
2239
2240 // Are the source and dst types the same size?
2241 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2242 return false;
2243
2244 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2245 RBI.getRegBank(DefDstReg, MRI, TRI))
2246 return false;
2247
2248 // We have a cross-bank copy, which is entering a store. Let's fold it.
2249 I.getOperand(0).setReg(DefDstReg);
2250 return true;
2251}
2252
2253bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2254 assert(I.getParent() && "Instruction should be in a basic block!");
2255 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2256
2257 MachineBasicBlock &MBB = *I.getParent();
2258 MachineFunction &MF = *MBB.getParent();
2260
2261 switch (I.getOpcode()) {
2262 case AArch64::G_DUP: {
2263 // Before selecting a DUP instruction, check if it is better selected as a
2264 // MOV or load from a constant pool.
2265 Register Src = I.getOperand(1).getReg();
2266 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2267 if (!ValAndVReg)
2268 return false;
2269 LLVMContext &Ctx = MF.getFunction().getContext();
2270 Register Dst = I.getOperand(0).getReg();
2272 MRI.getType(Dst).getNumElements(),
2273 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2274 ValAndVReg->Value));
2275 if (!emitConstantVector(Dst, CV, MIB, MRI))
2276 return false;
2277 I.eraseFromParent();
2278 return true;
2279 }
2280 case TargetOpcode::G_SEXT:
2281 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2282 // over a normal extend.
2283 if (selectUSMovFromExtend(I, MRI))
2284 return true;
2285 return false;
2286 case TargetOpcode::G_BR:
2287 return false;
2288 case TargetOpcode::G_SHL:
2289 return earlySelectSHL(I, MRI);
2290 case TargetOpcode::G_CONSTANT: {
2291 bool IsZero = false;
2292 if (I.getOperand(1).isCImm())
2293 IsZero = I.getOperand(1).getCImm()->isZero();
2294 else if (I.getOperand(1).isImm())
2295 IsZero = I.getOperand(1).getImm() == 0;
2296
2297 if (!IsZero)
2298 return false;
2299
2300 Register DefReg = I.getOperand(0).getReg();
2301 LLT Ty = MRI.getType(DefReg);
2302 if (Ty.getSizeInBits() == 64) {
2303 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2304 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2305 } else if (Ty.getSizeInBits() == 32) {
2306 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2307 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2308 } else
2309 return false;
2310
2311 I.setDesc(TII.get(TargetOpcode::COPY));
2312 return true;
2313 }
2314
2315 case TargetOpcode::G_ADD: {
2316 // Check if this is being fed by a G_ICMP on either side.
2317 //
2318 // (cmp pred, x, y) + z
2319 //
2320 // In the above case, when the cmp is true, we increment z by 1. So, we can
2321 // fold the add into the cset for the cmp by using cinc.
2322 //
2323 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2324 Register AddDst = I.getOperand(0).getReg();
2325 Register AddLHS = I.getOperand(1).getReg();
2326 Register AddRHS = I.getOperand(2).getReg();
2327 // Only handle scalars.
2328 LLT Ty = MRI.getType(AddLHS);
2329 if (Ty.isVector())
2330 return false;
2331 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2332 // bits.
2333 unsigned Size = Ty.getSizeInBits();
2334 if (Size != 32 && Size != 64)
2335 return false;
2336 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2337 if (!MRI.hasOneNonDBGUse(Reg))
2338 return nullptr;
2339 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2340 // compare.
2341 if (Size == 32)
2342 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2343 // We model scalar compares using 32-bit destinations right now.
2344 // If it's a 64-bit compare, it'll have 64-bit sources.
2345 Register ZExt;
2346 if (!mi_match(Reg, MRI,
2348 return nullptr;
2349 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2350 if (!Cmp ||
2351 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2352 return nullptr;
2353 return Cmp;
2354 };
2355 // Try to match
2356 // z + (cmp pred, x, y)
2357 MachineInstr *Cmp = MatchCmp(AddRHS);
2358 if (!Cmp) {
2359 // (cmp pred, x, y) + z
2360 std::swap(AddLHS, AddRHS);
2361 Cmp = MatchCmp(AddRHS);
2362 if (!Cmp)
2363 return false;
2364 }
2365 auto &PredOp = Cmp->getOperand(1);
2366 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2367 const AArch64CC::CondCode InvCC =
2370 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2371 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2372 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2373 I.eraseFromParent();
2374 return true;
2375 }
2376 case TargetOpcode::G_OR: {
2377 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2378 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2379 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2380 Register Dst = I.getOperand(0).getReg();
2381 LLT Ty = MRI.getType(Dst);
2382
2383 if (!Ty.isScalar())
2384 return false;
2385
2386 unsigned Size = Ty.getSizeInBits();
2387 if (Size != 32 && Size != 64)
2388 return false;
2389
2390 Register ShiftSrc;
2391 int64_t ShiftImm;
2392 Register MaskSrc;
2393 int64_t MaskImm;
2394 if (!mi_match(
2395 Dst, MRI,
2396 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2397 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2398 return false;
2399
2400 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2401 return false;
2402
2403 int64_t Immr = Size - ShiftImm;
2404 int64_t Imms = Size - ShiftImm - 1;
2405 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2406 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2407 I.eraseFromParent();
2408 return true;
2409 }
2410 case TargetOpcode::G_FENCE: {
2411 if (I.getOperand(1).getImm() == 0)
2412 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2413 else
2414 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2415 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2416 I.eraseFromParent();
2417 return true;
2418 }
2419 default:
2420 return false;
2421 }
2422}
2423
2424bool AArch64InstructionSelector::select(MachineInstr &I) {
2425 assert(I.getParent() && "Instruction should be in a basic block!");
2426 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2427
2428 MachineBasicBlock &MBB = *I.getParent();
2429 MachineFunction &MF = *MBB.getParent();
2431
2432 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2433 if (Subtarget->requiresStrictAlign()) {
2434 // We don't support this feature yet.
2435 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2436 return false;
2437 }
2438
2440
2441 unsigned Opcode = I.getOpcode();
2442 // G_PHI requires same handling as PHI
2443 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2444 // Certain non-generic instructions also need some special handling.
2445
2446 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2448
2449 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2450 const Register DefReg = I.getOperand(0).getReg();
2451 const LLT DefTy = MRI.getType(DefReg);
2452
2453 const RegClassOrRegBank &RegClassOrBank =
2454 MRI.getRegClassOrRegBank(DefReg);
2455
2456 const TargetRegisterClass *DefRC
2457 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2458 if (!DefRC) {
2459 if (!DefTy.isValid()) {
2460 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2461 return false;
2462 }
2463 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2464 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2465 if (!DefRC) {
2466 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2467 return false;
2468 }
2469 }
2470
2471 I.setDesc(TII.get(TargetOpcode::PHI));
2472
2473 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2474 }
2475
2476 if (I.isCopy())
2477 return selectCopy(I, TII, MRI, TRI, RBI);
2478
2479 if (I.isDebugInstr())
2480 return selectDebugInstr(I, MRI, RBI);
2481
2482 return true;
2483 }
2484
2485
2486 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2487 LLVM_DEBUG(
2488 dbgs() << "Generic instruction has unexpected implicit operands\n");
2489 return false;
2490 }
2491
2492 // Try to do some lowering before we start instruction selecting. These
2493 // lowerings are purely transformations on the input G_MIR and so selection
2494 // must continue after any modification of the instruction.
2495 if (preISelLower(I)) {
2496 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2497 }
2498
2499 // There may be patterns where the importer can't deal with them optimally,
2500 // but does select it to a suboptimal sequence so our custom C++ selection
2501 // code later never has a chance to work on it. Therefore, we have an early
2502 // selection attempt here to give priority to certain selection routines
2503 // over the imported ones.
2504 if (earlySelect(I))
2505 return true;
2506
2507 if (selectImpl(I, *CoverageInfo))
2508 return true;
2509
2510 LLT Ty =
2511 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2512
2513 switch (Opcode) {
2514 case TargetOpcode::G_SBFX:
2515 case TargetOpcode::G_UBFX: {
2516 static const unsigned OpcTable[2][2] = {
2517 {AArch64::UBFMWri, AArch64::UBFMXri},
2518 {AArch64::SBFMWri, AArch64::SBFMXri}};
2519 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2520 unsigned Size = Ty.getSizeInBits();
2521 unsigned Opc = OpcTable[IsSigned][Size == 64];
2522 auto Cst1 =
2523 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2524 assert(Cst1 && "Should have gotten a constant for src 1?");
2525 auto Cst2 =
2526 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2527 assert(Cst2 && "Should have gotten a constant for src 2?");
2528 auto LSB = Cst1->Value.getZExtValue();
2529 auto Width = Cst2->Value.getZExtValue();
2530 auto BitfieldInst =
2531 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2532 .addImm(LSB)
2533 .addImm(LSB + Width - 1);
2534 I.eraseFromParent();
2535 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2536 }
2537 case TargetOpcode::G_BRCOND:
2538 return selectCompareBranch(I, MF, MRI);
2539
2540 case TargetOpcode::G_BRINDIRECT: {
2541 I.setDesc(TII.get(AArch64::BR));
2543 }
2544
2545 case TargetOpcode::G_BRJT:
2546 return selectBrJT(I, MRI);
2547
2548 case AArch64::G_ADD_LOW: {
2549 // This op may have been separated from it's ADRP companion by the localizer
2550 // or some other code motion pass. Given that many CPUs will try to
2551 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2552 // which will later be expanded into an ADRP+ADD pair after scheduling.
2553 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2554 if (BaseMI->getOpcode() != AArch64::ADRP) {
2555 I.setDesc(TII.get(AArch64::ADDXri));
2556 I.addOperand(MachineOperand::CreateImm(0));
2558 }
2559 assert(TM.getCodeModel() == CodeModel::Small &&
2560 "Expected small code model");
2561 auto Op1 = BaseMI->getOperand(1);
2562 auto Op2 = I.getOperand(2);
2563 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2564 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2565 Op1.getTargetFlags())
2566 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2567 Op2.getTargetFlags());
2568 I.eraseFromParent();
2569 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2570 }
2571
2572 case TargetOpcode::G_FCONSTANT:
2573 case TargetOpcode::G_CONSTANT: {
2574 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2575
2576 const LLT s8 = LLT::scalar(8);
2577 const LLT s16 = LLT::scalar(16);
2578 const LLT s32 = LLT::scalar(32);
2579 const LLT s64 = LLT::scalar(64);
2580 const LLT s128 = LLT::scalar(128);
2581 const LLT p0 = LLT::pointer(0, 64);
2582
2583 const Register DefReg = I.getOperand(0).getReg();
2584 const LLT DefTy = MRI.getType(DefReg);
2585 const unsigned DefSize = DefTy.getSizeInBits();
2586 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2587
2588 // FIXME: Redundant check, but even less readable when factored out.
2589 if (isFP) {
2590 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2591 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2592 << " constant, expected: " << s16 << " or " << s32
2593 << " or " << s64 << " or " << s128 << '\n');
2594 return false;
2595 }
2596
2597 if (RB.getID() != AArch64::FPRRegBankID) {
2598 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2599 << " constant on bank: " << RB
2600 << ", expected: FPR\n");
2601 return false;
2602 }
2603
2604 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2605 // can be sure tablegen works correctly and isn't rescued by this code.
2606 // 0.0 is not covered by tablegen for FP128. So we will handle this
2607 // scenario in the code here.
2608 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2609 return false;
2610 } else {
2611 // s32 and s64 are covered by tablegen.
2612 if (Ty != p0 && Ty != s8 && Ty != s16) {
2613 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2614 << " constant, expected: " << s32 << ", " << s64
2615 << ", or " << p0 << '\n');
2616 return false;
2617 }
2618
2619 if (RB.getID() != AArch64::GPRRegBankID) {
2620 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2621 << " constant on bank: " << RB
2622 << ", expected: GPR\n");
2623 return false;
2624 }
2625 }
2626
2627 if (isFP) {
2628 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2629 // For 16, 64, and 128b values, emit a constant pool load.
2630 switch (DefSize) {
2631 default:
2632 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2633 case 32:
2634 case 64: {
2635 bool OptForSize = shouldOptForSize(&MF);
2636 const auto &TLI = MF.getSubtarget().getTargetLowering();
2637 // If TLI says that this fpimm is illegal, then we'll expand to a
2638 // constant pool load.
2639 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2640 EVT::getFloatingPointVT(DefSize), OptForSize))
2641 break;
2642 [[fallthrough]];
2643 }
2644 case 16:
2645 case 128: {
2646 auto *FPImm = I.getOperand(1).getFPImm();
2647 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2648 if (!LoadMI) {
2649 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2650 return false;
2651 }
2652 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2653 I.eraseFromParent();
2654 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2655 }
2656 }
2657
2658 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2659 // Either emit a FMOV, or emit a copy to emit a normal mov.
2660 const Register DefGPRReg = MRI.createVirtualRegister(
2661 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2662 MachineOperand &RegOp = I.getOperand(0);
2663 RegOp.setReg(DefGPRReg);
2664 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2665 MIB.buildCopy({DefReg}, {DefGPRReg});
2666
2667 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2668 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2669 return false;
2670 }
2671
2672 MachineOperand &ImmOp = I.getOperand(1);
2673 // FIXME: Is going through int64_t always correct?
2674 ImmOp.ChangeToImmediate(
2676 } else if (I.getOperand(1).isCImm()) {
2677 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2678 I.getOperand(1).ChangeToImmediate(Val);
2679 } else if (I.getOperand(1).isImm()) {
2680 uint64_t Val = I.getOperand(1).getImm();
2681 I.getOperand(1).ChangeToImmediate(Val);
2682 }
2683
2684 const unsigned MovOpc =
2685 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2686 I.setDesc(TII.get(MovOpc));
2688 return true;
2689 }
2690 case TargetOpcode::G_EXTRACT: {
2691 Register DstReg = I.getOperand(0).getReg();
2692 Register SrcReg = I.getOperand(1).getReg();
2693 LLT SrcTy = MRI.getType(SrcReg);
2694 LLT DstTy = MRI.getType(DstReg);
2695 (void)DstTy;
2696 unsigned SrcSize = SrcTy.getSizeInBits();
2697
2698 if (SrcTy.getSizeInBits() > 64) {
2699 // This should be an extract of an s128, which is like a vector extract.
2700 if (SrcTy.getSizeInBits() != 128)
2701 return false;
2702 // Only support extracting 64 bits from an s128 at the moment.
2703 if (DstTy.getSizeInBits() != 64)
2704 return false;
2705
2706 unsigned Offset = I.getOperand(2).getImm();
2707 if (Offset % 64 != 0)
2708 return false;
2709
2710 // Check we have the right regbank always.
2711 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2712 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2713 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2714
2715 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2716 auto NewI =
2717 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2718 .addUse(SrcReg, 0,
2719 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2720 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2721 AArch64::GPR64RegClass, NewI->getOperand(0));
2722 I.eraseFromParent();
2723 return true;
2724 }
2725
2726 // Emit the same code as a vector extract.
2727 // Offset must be a multiple of 64.
2728 unsigned LaneIdx = Offset / 64;
2729 MachineInstr *Extract = emitExtractVectorElt(
2730 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2731 if (!Extract)
2732 return false;
2733 I.eraseFromParent();
2734 return true;
2735 }
2736
2737 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2738 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2739 Ty.getSizeInBits() - 1);
2740
2741 if (SrcSize < 64) {
2742 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2743 "unexpected G_EXTRACT types");
2745 }
2746
2747 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2748 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2749 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2750 .addReg(DstReg, 0, AArch64::sub_32);
2751 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2752 AArch64::GPR32RegClass, MRI);
2753 I.getOperand(0).setReg(DstReg);
2754
2756 }
2757
2758 case TargetOpcode::G_INSERT: {
2759 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2760 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2761 unsigned DstSize = DstTy.getSizeInBits();
2762 // Larger inserts are vectors, same-size ones should be something else by
2763 // now (split up or turned into COPYs).
2764 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2765 return false;
2766
2767 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2768 unsigned LSB = I.getOperand(3).getImm();
2769 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2770 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2771 MachineInstrBuilder(MF, I).addImm(Width - 1);
2772
2773 if (DstSize < 64) {
2774 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2775 "unexpected G_INSERT types");
2777 }
2778
2779 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2780 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2781 TII.get(AArch64::SUBREG_TO_REG))
2782 .addDef(SrcReg)
2783 .addImm(0)
2784 .addUse(I.getOperand(2).getReg())
2785 .addImm(AArch64::sub_32);
2786 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2787 AArch64::GPR32RegClass, MRI);
2788 I.getOperand(2).setReg(SrcReg);
2789
2791 }
2792 case TargetOpcode::G_FRAME_INDEX: {
2793 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2794 if (Ty != LLT::pointer(0, 64)) {
2795 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2796 << ", expected: " << LLT::pointer(0, 64) << '\n');
2797 return false;
2798 }
2799 I.setDesc(TII.get(AArch64::ADDXri));
2800
2801 // MOs for a #0 shifted immediate.
2802 I.addOperand(MachineOperand::CreateImm(0));
2803 I.addOperand(MachineOperand::CreateImm(0));
2804
2806 }
2807
2808 case TargetOpcode::G_GLOBAL_VALUE: {
2809 const GlobalValue *GV = nullptr;
2810 unsigned OpFlags;
2811 if (I.getOperand(1).isSymbol()) {
2812 OpFlags = I.getOperand(1).getTargetFlags();
2813 // Currently only used by "RtLibUseGOT".
2814 assert(OpFlags == AArch64II::MO_GOT);
2815 } else {
2816 GV = I.getOperand(1).getGlobal();
2817 if (GV->isThreadLocal())
2818 return selectTLSGlobalValue(I, MRI);
2819 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2820 }
2821
2822 if (OpFlags & AArch64II::MO_GOT) {
2823 I.setDesc(TII.get(AArch64::LOADgot));
2824 I.getOperand(1).setTargetFlags(OpFlags);
2825 } else if (TM.getCodeModel() == CodeModel::Large &&
2826 !TM.isPositionIndependent()) {
2827 // Materialize the global using movz/movk instructions.
2828 materializeLargeCMVal(I, GV, OpFlags);
2829 I.eraseFromParent();
2830 return true;
2831 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2832 I.setDesc(TII.get(AArch64::ADR));
2833 I.getOperand(1).setTargetFlags(OpFlags);
2834 } else {
2835 I.setDesc(TII.get(AArch64::MOVaddr));
2836 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2837 MachineInstrBuilder MIB(MF, I);
2838 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2840 }
2842 }
2843
2844 case TargetOpcode::G_ZEXTLOAD:
2845 case TargetOpcode::G_LOAD:
2846 case TargetOpcode::G_STORE: {
2847 GLoadStore &LdSt = cast<GLoadStore>(I);
2848 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2849 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2850
2851 if (PtrTy != LLT::pointer(0, 64)) {
2852 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2853 << ", expected: " << LLT::pointer(0, 64) << '\n');
2854 return false;
2855 }
2856
2857 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2858 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2859 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2860
2861 // Need special instructions for atomics that affect ordering.
2862 if (Order != AtomicOrdering::NotAtomic &&
2863 Order != AtomicOrdering::Unordered &&
2864 Order != AtomicOrdering::Monotonic) {
2865 assert(!isa<GZExtLoad>(LdSt));
2866 if (MemSizeInBytes > 64)
2867 return false;
2868
2869 if (isa<GLoad>(LdSt)) {
2870 static constexpr unsigned LDAPROpcodes[] = {
2871 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2872 static constexpr unsigned LDAROpcodes[] = {
2873 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2874 ArrayRef<unsigned> Opcodes =
2875 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2876 ? LDAPROpcodes
2877 : LDAROpcodes;
2878 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2879 } else {
2880 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2881 AArch64::STLRW, AArch64::STLRX};
2882 Register ValReg = LdSt.getReg(0);
2883 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2884 // Emit a subreg copy of 32 bits.
2885 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2886 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2887 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2888 I.getOperand(0).setReg(NewVal);
2889 }
2890 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2891 }
2893 return true;
2894 }
2895
2896#ifndef NDEBUG
2897 const Register PtrReg = LdSt.getPointerReg();
2898 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2899 // Check that the pointer register is valid.
2900 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2901 "Load/Store pointer operand isn't a GPR");
2902 assert(MRI.getType(PtrReg).isPointer() &&
2903 "Load/Store pointer operand isn't a pointer");
2904#endif
2905
2906 const Register ValReg = LdSt.getReg(0);
2907 const LLT ValTy = MRI.getType(ValReg);
2908 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2909
2910 // The code below doesn't support truncating stores, so we need to split it
2911 // again.
2912 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2913 unsigned SubReg;
2914 LLT MemTy = LdSt.getMMO().getMemoryType();
2915 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2916 if (!getSubRegForClass(RC, TRI, SubReg))
2917 return false;
2918
2919 // Generate a subreg copy.
2920 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2921 .addReg(ValReg, 0, SubReg)
2922 .getReg(0);
2923 RBI.constrainGenericRegister(Copy, *RC, MRI);
2924 LdSt.getOperand(0).setReg(Copy);
2925 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2926 // If this is an any-extending load from the FPR bank, split it into a regular
2927 // load + extend.
2928 if (RB.getID() == AArch64::FPRRegBankID) {
2929 unsigned SubReg;
2930 LLT MemTy = LdSt.getMMO().getMemoryType();
2931 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2932 if (!getSubRegForClass(RC, TRI, SubReg))
2933 return false;
2934 Register OldDst = LdSt.getReg(0);
2935 Register NewDst =
2936 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2937 LdSt.getOperand(0).setReg(NewDst);
2938 MRI.setRegBank(NewDst, RB);
2939 // Generate a SUBREG_TO_REG to extend it.
2940 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2941 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2942 .addImm(0)
2943 .addUse(NewDst)
2944 .addImm(SubReg);
2945 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2946 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2947 MIB.setInstr(LdSt);
2948 }
2949 }
2950
2951 // Helper lambda for partially selecting I. Either returns the original
2952 // instruction with an updated opcode, or a new instruction.
2953 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2954 bool IsStore = isa<GStore>(I);
2955 const unsigned NewOpc =
2956 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2957 if (NewOpc == I.getOpcode())
2958 return nullptr;
2959 // Check if we can fold anything into the addressing mode.
2960 auto AddrModeFns =
2961 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2962 if (!AddrModeFns) {
2963 // Can't fold anything. Use the original instruction.
2964 I.setDesc(TII.get(NewOpc));
2965 I.addOperand(MachineOperand::CreateImm(0));
2966 return &I;
2967 }
2968
2969 // Folded something. Create a new instruction and return it.
2970 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2971 Register CurValReg = I.getOperand(0).getReg();
2972 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2973 NewInst.cloneMemRefs(I);
2974 for (auto &Fn : *AddrModeFns)
2975 Fn(NewInst);
2976 I.eraseFromParent();
2977 return &*NewInst;
2978 };
2979
2980 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2981 if (!LoadStore)
2982 return false;
2983
2984 // If we're storing a 0, use WZR/XZR.
2985 if (Opcode == TargetOpcode::G_STORE) {
2987 LoadStore->getOperand(0).getReg(), MRI);
2988 if (CVal && CVal->Value == 0) {
2989 switch (LoadStore->getOpcode()) {
2990 case AArch64::STRWui:
2991 case AArch64::STRHHui:
2992 case AArch64::STRBBui:
2993 LoadStore->getOperand(0).setReg(AArch64::WZR);
2994 break;
2995 case AArch64::STRXui:
2996 LoadStore->getOperand(0).setReg(AArch64::XZR);
2997 break;
2998 }
2999 }
3000 }
3001
3002 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3003 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3004 // The any/zextload from a smaller type to i32 should be handled by the
3005 // importer.
3006 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3007 return false;
3008 // If we have an extending load then change the load's type to be a
3009 // narrower reg and zero_extend with SUBREG_TO_REG.
3010 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3011 Register DstReg = LoadStore->getOperand(0).getReg();
3012 LoadStore->getOperand(0).setReg(LdReg);
3013
3014 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3015 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3016 .addImm(0)
3017 .addUse(LdReg)
3018 .addImm(AArch64::sub_32);
3019 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3020 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3021 MRI);
3022 }
3023 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3024 }
3025
3026 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3027 case TargetOpcode::G_INDEXED_SEXTLOAD:
3028 return selectIndexedExtLoad(I, MRI);
3029 case TargetOpcode::G_INDEXED_LOAD:
3030 return selectIndexedLoad(I, MRI);
3031 case TargetOpcode::G_INDEXED_STORE:
3032 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3033
3034 case TargetOpcode::G_LSHR:
3035 case TargetOpcode::G_ASHR:
3036 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3037 return selectVectorAshrLshr(I, MRI);
3038 [[fallthrough]];
3039 case TargetOpcode::G_SHL:
3040 if (Opcode == TargetOpcode::G_SHL &&
3041 MRI.getType(I.getOperand(0).getReg()).isVector())
3042 return selectVectorSHL(I, MRI);
3043
3044 // These shifts were legalized to have 64 bit shift amounts because we
3045 // want to take advantage of the selection patterns that assume the
3046 // immediates are s64s, however, selectBinaryOp will assume both operands
3047 // will have the same bit size.
3048 {
3049 Register SrcReg = I.getOperand(1).getReg();
3050 Register ShiftReg = I.getOperand(2).getReg();
3051 const LLT ShiftTy = MRI.getType(ShiftReg);
3052 const LLT SrcTy = MRI.getType(SrcReg);
3053 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3054 ShiftTy.getSizeInBits() == 64) {
3055 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3056 // Insert a subregister copy to implement a 64->32 trunc
3057 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3058 .addReg(ShiftReg, 0, AArch64::sub_32);
3059 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3060 I.getOperand(2).setReg(Trunc.getReg(0));
3061 }
3062 }
3063 [[fallthrough]];
3064 case TargetOpcode::G_OR: {
3065 // Reject the various things we don't support yet.
3066 if (unsupportedBinOp(I, RBI, MRI, TRI))
3067 return false;
3068
3069 const unsigned OpSize = Ty.getSizeInBits();
3070
3071 const Register DefReg = I.getOperand(0).getReg();
3072 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3073
3074 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3075 if (NewOpc == I.getOpcode())
3076 return false;
3077
3078 I.setDesc(TII.get(NewOpc));
3079 // FIXME: Should the type be always reset in setDesc?
3080
3081 // Now that we selected an opcode, we need to constrain the register
3082 // operands to use appropriate classes.
3084 }
3085
3086 case TargetOpcode::G_PTR_ADD: {
3087 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3088 I.eraseFromParent();
3089 return true;
3090 }
3091
3092 case TargetOpcode::G_SADDE:
3093 case TargetOpcode::G_UADDE:
3094 case TargetOpcode::G_SSUBE:
3095 case TargetOpcode::G_USUBE:
3096 case TargetOpcode::G_SADDO:
3097 case TargetOpcode::G_UADDO:
3098 case TargetOpcode::G_SSUBO:
3099 case TargetOpcode::G_USUBO:
3100 return selectOverflowOp(I, MRI);
3101
3102 case TargetOpcode::G_PTRMASK: {
3103 Register MaskReg = I.getOperand(2).getReg();
3104 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3105 // TODO: Implement arbitrary cases
3106 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3107 return false;
3108
3109 uint64_t Mask = *MaskVal;
3110 I.setDesc(TII.get(AArch64::ANDXri));
3111 I.getOperand(2).ChangeToImmediate(
3113
3115 }
3116 case TargetOpcode::G_PTRTOINT:
3117 case TargetOpcode::G_TRUNC: {
3118 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3119 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3120
3121 const Register DstReg = I.getOperand(0).getReg();
3122 const Register SrcReg = I.getOperand(1).getReg();
3123
3124 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3125 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3126
3127 if (DstRB.getID() != SrcRB.getID()) {
3128 LLVM_DEBUG(
3129 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3130 return false;
3131 }
3132
3133 if (DstRB.getID() == AArch64::GPRRegBankID) {
3134 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3135 if (!DstRC)
3136 return false;
3137
3138 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3139 if (!SrcRC)
3140 return false;
3141
3142 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3143 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3144 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3145 return false;
3146 }
3147
3148 if (DstRC == SrcRC) {
3149 // Nothing to be done
3150 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3151 SrcTy == LLT::scalar(64)) {
3152 llvm_unreachable("TableGen can import this case");
3153 return false;
3154 } else if (DstRC == &AArch64::GPR32RegClass &&
3155 SrcRC == &AArch64::GPR64RegClass) {
3156 I.getOperand(1).setSubReg(AArch64::sub_32);
3157 } else {
3158 LLVM_DEBUG(
3159 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3160 return false;
3161 }
3162
3163 I.setDesc(TII.get(TargetOpcode::COPY));
3164 return true;
3165 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3166 if (DstTy == LLT::fixed_vector(4, 16) &&
3167 SrcTy == LLT::fixed_vector(4, 32)) {
3168 I.setDesc(TII.get(AArch64::XTNv4i16));
3170 return true;
3171 }
3172
3173 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3174 MachineInstr *Extract = emitExtractVectorElt(
3175 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3176 if (!Extract)
3177 return false;
3178 I.eraseFromParent();
3179 return true;
3180 }
3181
3182 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3183 if (Opcode == TargetOpcode::G_PTRTOINT) {
3184 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3185 I.setDesc(TII.get(TargetOpcode::COPY));
3186 return selectCopy(I, TII, MRI, TRI, RBI);
3187 }
3188 }
3189
3190 return false;
3191 }
3192
3193 case TargetOpcode::G_ANYEXT: {
3194 if (selectUSMovFromExtend(I, MRI))
3195 return true;
3196
3197 const Register DstReg = I.getOperand(0).getReg();
3198 const Register SrcReg = I.getOperand(1).getReg();
3199
3200 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3201 if (RBDst.getID() != AArch64::GPRRegBankID) {
3202 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3203 << ", expected: GPR\n");
3204 return false;
3205 }
3206
3207 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3208 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3209 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3210 << ", expected: GPR\n");
3211 return false;
3212 }
3213
3214 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3215
3216 if (DstSize == 0) {
3217 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3218 return false;
3219 }
3220
3221 if (DstSize != 64 && DstSize > 32) {
3222 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3223 << ", expected: 32 or 64\n");
3224 return false;
3225 }
3226 // At this point G_ANYEXT is just like a plain COPY, but we need
3227 // to explicitly form the 64-bit value if any.
3228 if (DstSize > 32) {
3229 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3230 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3231 .addDef(ExtSrc)
3232 .addImm(0)
3233 .addUse(SrcReg)
3234 .addImm(AArch64::sub_32);
3235 I.getOperand(1).setReg(ExtSrc);
3236 }
3237 return selectCopy(I, TII, MRI, TRI, RBI);
3238 }
3239
3240 case TargetOpcode::G_ZEXT:
3241 case TargetOpcode::G_SEXT_INREG:
3242 case TargetOpcode::G_SEXT: {
3243 if (selectUSMovFromExtend(I, MRI))
3244 return true;
3245
3246 unsigned Opcode = I.getOpcode();
3247 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3248 const Register DefReg = I.getOperand(0).getReg();
3249 Register SrcReg = I.getOperand(1).getReg();
3250 const LLT DstTy = MRI.getType(DefReg);
3251 const LLT SrcTy = MRI.getType(SrcReg);
3252 unsigned DstSize = DstTy.getSizeInBits();
3253 unsigned SrcSize = SrcTy.getSizeInBits();
3254
3255 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3256 // extended is encoded in the imm.
3257 if (Opcode == TargetOpcode::G_SEXT_INREG)
3258 SrcSize = I.getOperand(2).getImm();
3259
3260 if (DstTy.isVector())
3261 return false; // Should be handled by imported patterns.
3262
3263 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3264 AArch64::GPRRegBankID &&
3265 "Unexpected ext regbank");
3266
3267 MachineInstr *ExtI;
3268
3269 // First check if we're extending the result of a load which has a dest type
3270 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3271 // GPR register on AArch64 and all loads which are smaller automatically
3272 // zero-extend the upper bits. E.g.
3273 // %v(s8) = G_LOAD %p, :: (load 1)
3274 // %v2(s32) = G_ZEXT %v(s8)
3275 if (!IsSigned) {
3276 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3277 bool IsGPR =
3278 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3279 if (LoadMI && IsGPR) {
3280 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3281 unsigned BytesLoaded = MemOp->getSize().getValue();
3282 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3283 return selectCopy(I, TII, MRI, TRI, RBI);
3284 }
3285
3286 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3287 // + SUBREG_TO_REG.
3288 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3289 Register SubregToRegSrc =
3290 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3291 const Register ZReg = AArch64::WZR;
3292 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3293 .addImm(0);
3294
3295 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3296 .addImm(0)
3297 .addUse(SubregToRegSrc)
3298 .addImm(AArch64::sub_32);
3299
3300 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3301 MRI)) {
3302 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3303 return false;
3304 }
3305
3306 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3307 MRI)) {
3308 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3309 return false;
3310 }
3311
3312 I.eraseFromParent();
3313 return true;
3314 }
3315 }
3316
3317 if (DstSize == 64) {
3318 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3319 // FIXME: Can we avoid manually doing this?
3320 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3321 MRI)) {
3322 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3323 << " operand\n");
3324 return false;
3325 }
3326 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3327 {&AArch64::GPR64RegClass}, {})
3328 .addImm(0)
3329 .addUse(SrcReg)
3330 .addImm(AArch64::sub_32)
3331 .getReg(0);
3332 }
3333
3334 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3335 {DefReg}, {SrcReg})
3336 .addImm(0)
3337 .addImm(SrcSize - 1);
3338 } else if (DstSize <= 32) {
3339 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3340 {DefReg}, {SrcReg})
3341 .addImm(0)
3342 .addImm(SrcSize - 1);
3343 } else {
3344 return false;
3345 }
3346
3348 I.eraseFromParent();
3349 return true;
3350 }
3351
3352 case TargetOpcode::G_SITOFP:
3353 case TargetOpcode::G_UITOFP:
3354 case TargetOpcode::G_FPTOSI:
3355 case TargetOpcode::G_FPTOUI: {
3356 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3357 SrcTy = MRI.getType(I.getOperand(1).getReg());
3358 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3359 if (NewOpc == Opcode)
3360 return false;
3361
3362 I.setDesc(TII.get(NewOpc));
3364 I.setFlags(MachineInstr::NoFPExcept);
3365
3366 return true;
3367 }
3368
3369 case TargetOpcode::G_FREEZE:
3370 return selectCopy(I, TII, MRI, TRI, RBI);
3371
3372 case TargetOpcode::G_INTTOPTR:
3373 // The importer is currently unable to import pointer types since they
3374 // didn't exist in SelectionDAG.
3375 return selectCopy(I, TII, MRI, TRI, RBI);
3376
3377 case TargetOpcode::G_BITCAST:
3378 // Imported SelectionDAG rules can handle every bitcast except those that
3379 // bitcast from a type to the same type. Ideally, these shouldn't occur
3380 // but we might not run an optimizer that deletes them. The other exception
3381 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3382 // of them.
3383 return selectCopy(I, TII, MRI, TRI, RBI);
3384
3385 case TargetOpcode::G_SELECT: {
3386 auto &Sel = cast<GSelect>(I);
3387 const Register CondReg = Sel.getCondReg();
3388 const Register TReg = Sel.getTrueReg();
3389 const Register FReg = Sel.getFalseReg();
3390
3391 if (tryOptSelect(Sel))
3392 return true;
3393
3394 // Make sure to use an unused vreg instead of wzr, so that the peephole
3395 // optimizations will be able to optimize these.
3396 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3397 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3398 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3400 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3401 return false;
3402 Sel.eraseFromParent();
3403 return true;
3404 }
3405 case TargetOpcode::G_ICMP: {
3406 if (Ty.isVector())
3407 return selectVectorICmp(I, MRI);
3408
3409 if (Ty != LLT::scalar(32)) {
3410 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3411 << ", expected: " << LLT::scalar(32) << '\n');
3412 return false;
3413 }
3414
3415 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3416 const AArch64CC::CondCode InvCC =
3418 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3419 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3420 /*Src2=*/AArch64::WZR, InvCC, MIB);
3421 I.eraseFromParent();
3422 return true;
3423 }
3424
3425 case TargetOpcode::G_FCMP: {
3426 CmpInst::Predicate Pred =
3427 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3428 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3429 Pred) ||
3430 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3431 return false;
3432 I.eraseFromParent();
3433 return true;
3434 }
3435 case TargetOpcode::G_VASTART:
3436 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3437 : selectVaStartAAPCS(I, MF, MRI);
3438 case TargetOpcode::G_INTRINSIC:
3439 return selectIntrinsic(I, MRI);
3440 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3441 return selectIntrinsicWithSideEffects(I, MRI);
3442 case TargetOpcode::G_IMPLICIT_DEF: {
3443 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3444 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3445 const Register DstReg = I.getOperand(0).getReg();
3446 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3447 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3448 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3449 return true;
3450 }
3451 case TargetOpcode::G_BLOCK_ADDR: {
3452 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3453 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3454 I.eraseFromParent();
3455 return true;
3456 } else {
3457 I.setDesc(TII.get(AArch64::MOVaddrBA));
3458 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3459 I.getOperand(0).getReg())
3460 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3461 /* Offset */ 0, AArch64II::MO_PAGE)
3463 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3465 I.eraseFromParent();
3466 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3467 }
3468 }
3469 case AArch64::G_DUP: {
3470 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3471 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3472 // difficult because at RBS we may end up pessimizing the fpr case if we
3473 // decided to add an anyextend to fix this. Manual selection is the most
3474 // robust solution for now.
3475 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3476 AArch64::GPRRegBankID)
3477 return false; // We expect the fpr regbank case to be imported.
3478 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3479 if (VecTy == LLT::fixed_vector(8, 8))
3480 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3481 else if (VecTy == LLT::fixed_vector(16, 8))
3482 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3483 else if (VecTy == LLT::fixed_vector(4, 16))
3484 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3485 else if (VecTy == LLT::fixed_vector(8, 16))
3486 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3487 else
3488 return false;
3490 }
3491 case TargetOpcode::G_BUILD_VECTOR:
3492 return selectBuildVector(I, MRI);
3493 case TargetOpcode::G_MERGE_VALUES:
3494 return selectMergeValues(I, MRI);
3495 case TargetOpcode::G_UNMERGE_VALUES:
3496 return selectUnmergeValues(I, MRI);
3497 case TargetOpcode::G_SHUFFLE_VECTOR:
3498 return selectShuffleVector(I, MRI);
3499 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3500 return selectExtractElt(I, MRI);
3501 case TargetOpcode::G_INSERT_VECTOR_ELT:
3502 return selectInsertElt(I, MRI);
3503 case TargetOpcode::G_CONCAT_VECTORS:
3504 return selectConcatVectors(I, MRI);
3505 case TargetOpcode::G_JUMP_TABLE:
3506 return selectJumpTable(I, MRI);
3507 case TargetOpcode::G_MEMCPY:
3508 case TargetOpcode::G_MEMCPY_INLINE:
3509 case TargetOpcode::G_MEMMOVE:
3510 case TargetOpcode::G_MEMSET:
3511 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3512 return selectMOPS(I, MRI);
3513 }
3514
3515 return false;
3516}
3517
3518bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3519 MachineIRBuilderState OldMIBState = MIB.getState();
3520 bool Success = select(I);
3521 MIB.setState(OldMIBState);
3522 return Success;
3523}
3524
3525bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3527 unsigned Mopcode;
3528 switch (GI.getOpcode()) {
3529 case TargetOpcode::G_MEMCPY:
3530 case TargetOpcode::G_MEMCPY_INLINE:
3531 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3532 break;
3533 case TargetOpcode::G_MEMMOVE:
3534 Mopcode = AArch64::MOPSMemoryMovePseudo;
3535 break;
3536 case TargetOpcode::G_MEMSET:
3537 // For tagged memset see llvm.aarch64.mops.memset.tag
3538 Mopcode = AArch64::MOPSMemorySetPseudo;
3539 break;
3540 }
3541
3542 auto &DstPtr = GI.getOperand(0);
3543 auto &SrcOrVal = GI.getOperand(1);
3544 auto &Size = GI.getOperand(2);
3545
3546 // Create copies of the registers that can be clobbered.
3547 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3548 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3549 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3550
3551 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3552 const auto &SrcValRegClass =
3553 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3554
3555 // Constrain to specific registers
3556 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3557 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3558 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3559
3560 MIB.buildCopy(DstPtrCopy, DstPtr);
3561 MIB.buildCopy(SrcValCopy, SrcOrVal);
3562 MIB.buildCopy(SizeCopy, Size);
3563
3564 // New instruction uses the copied registers because it must update them.
3565 // The defs are not used since they don't exist in G_MEM*. They are still
3566 // tied.
3567 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3568 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3569 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3570 if (IsSet) {
3571 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3572 {DstPtrCopy, SizeCopy, SrcValCopy});
3573 } else {
3574 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3575 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3576 {DstPtrCopy, SrcValCopy, SizeCopy});
3577 }
3578
3579 GI.eraseFromParent();
3580 return true;
3581}
3582
3583bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3585 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3586 Register JTAddr = I.getOperand(0).getReg();
3587 unsigned JTI = I.getOperand(1).getIndex();
3588 Register Index = I.getOperand(2).getReg();
3589
3590 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3591 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3592
3593 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3594 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3595 {TargetReg, ScratchReg}, {JTAddr, Index})
3596 .addJumpTableIndex(JTI);
3597 // Save the jump table info.
3598 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3599 {static_cast<int64_t>(JTI)});
3600 // Build the indirect branch.
3601 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3602 I.eraseFromParent();
3603 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3604}
3605
3606bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3608 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3609 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3610
3611 Register DstReg = I.getOperand(0).getReg();
3612 unsigned JTI = I.getOperand(1).getIndex();
3613 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3614 auto MovMI =
3615 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3616 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3618 I.eraseFromParent();
3619 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3620}
3621
3622bool AArch64InstructionSelector::selectTLSGlobalValue(
3624 if (!STI.isTargetMachO())
3625 return false;
3626 MachineFunction &MF = *I.getParent()->getParent();
3627 MF.getFrameInfo().setAdjustsStack(true);
3628
3629 const auto &GlobalOp = I.getOperand(1);
3630 assert(GlobalOp.getOffset() == 0 &&
3631 "Shouldn't have an offset on TLS globals!");
3632 const GlobalValue &GV = *GlobalOp.getGlobal();
3633
3634 auto LoadGOT =
3635 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3636 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3637
3638 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3639 {LoadGOT.getReg(0)})
3640 .addImm(0);
3641
3642 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3643 // TLS calls preserve all registers except those that absolutely must be
3644 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3645 // silly).
3646 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3647 .addUse(AArch64::X0, RegState::Implicit)
3648 .addDef(AArch64::X0, RegState::Implicit)
3649 .addRegMask(TRI.getTLSCallPreservedMask());
3650
3651 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3652 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3653 MRI);
3654 I.eraseFromParent();
3655 return true;
3656}
3657
3658bool AArch64InstructionSelector::selectVectorICmp(
3660 Register DstReg = I.getOperand(0).getReg();
3661 LLT DstTy = MRI.getType(DstReg);
3662 Register SrcReg = I.getOperand(2).getReg();
3663 Register Src2Reg = I.getOperand(3).getReg();
3664 LLT SrcTy = MRI.getType(SrcReg);
3665
3666 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3667 unsigned NumElts = DstTy.getNumElements();
3668
3669 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3670 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3671 // Third index is cc opcode:
3672 // 0 == eq
3673 // 1 == ugt
3674 // 2 == uge
3675 // 3 == ult
3676 // 4 == ule
3677 // 5 == sgt
3678 // 6 == sge
3679 // 7 == slt
3680 // 8 == sle
3681 // ne is done by negating 'eq' result.
3682
3683 // This table below assumes that for some comparisons the operands will be
3684 // commuted.
3685 // ult op == commute + ugt op
3686 // ule op == commute + uge op
3687 // slt op == commute + sgt op
3688 // sle op == commute + sge op
3689 unsigned PredIdx = 0;
3690 bool SwapOperands = false;
3691 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3692 switch (Pred) {
3693 case CmpInst::ICMP_NE:
3694 case CmpInst::ICMP_EQ:
3695 PredIdx = 0;
3696 break;
3697 case CmpInst::ICMP_UGT:
3698 PredIdx = 1;
3699 break;
3700 case CmpInst::ICMP_UGE:
3701 PredIdx = 2;
3702 break;
3703 case CmpInst::ICMP_ULT:
3704 PredIdx = 3;
3705 SwapOperands = true;
3706 break;
3707 case CmpInst::ICMP_ULE:
3708 PredIdx = 4;
3709 SwapOperands = true;
3710 break;
3711 case CmpInst::ICMP_SGT:
3712 PredIdx = 5;
3713 break;
3714 case CmpInst::ICMP_SGE:
3715 PredIdx = 6;
3716 break;
3717 case CmpInst::ICMP_SLT:
3718 PredIdx = 7;
3719 SwapOperands = true;
3720 break;
3721 case CmpInst::ICMP_SLE:
3722 PredIdx = 8;
3723 SwapOperands = true;
3724 break;
3725 default:
3726 llvm_unreachable("Unhandled icmp predicate");
3727 return false;
3728 }
3729
3730 // This table obviously should be tablegen'd when we have our GISel native
3731 // tablegen selector.
3732
3733 static const unsigned OpcTable[4][4][9] = {
3734 {
3735 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3736 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3737 0 /* invalid */},
3738 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3739 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3740 0 /* invalid */},
3741 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3742 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3743 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3744 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3745 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3746 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3747 },
3748 {
3749 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3750 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3751 0 /* invalid */},
3752 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3753 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3754 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3755 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3756 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3757 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3758 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3759 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3760 0 /* invalid */}
3761 },
3762 {
3763 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3764 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3765 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3766 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3767 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3768 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3769 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3770 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3771 0 /* invalid */},
3772 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3773 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3774 0 /* invalid */}
3775 },
3776 {
3777 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3778 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3779 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3780 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3781 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3782 0 /* invalid */},
3783 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3784 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3785 0 /* invalid */},
3786 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3787 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3788 0 /* invalid */}
3789 },
3790 };
3791 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3792 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3793 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3794 if (!Opc) {
3795 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3796 return false;
3797 }
3798
3799 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3800 const TargetRegisterClass *SrcRC =
3801 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3802 if (!SrcRC) {
3803 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3804 return false;
3805 }
3806
3807 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3808 if (SrcTy.getSizeInBits() == 128)
3809 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3810
3811 if (SwapOperands)
3812 std::swap(SrcReg, Src2Reg);
3813
3814 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3816
3817 // Invert if we had a 'ne' cc.
3818 if (NotOpc) {
3819 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3821 } else {
3822 MIB.buildCopy(DstReg, Cmp.getReg(0));
3823 }
3824 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3825 I.eraseFromParent();
3826 return true;
3827}
3828
3829MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3830 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3831 MachineIRBuilder &MIRBuilder) const {
3832 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3833
3834 auto BuildFn = [&](unsigned SubregIndex) {
3835 auto Ins =
3836 MIRBuilder
3837 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3838 .addImm(SubregIndex);
3841 return &*Ins;
3842 };
3843
3844 switch (EltSize) {
3845 case 8:
3846 return BuildFn(AArch64::bsub);
3847 case 16:
3848 return BuildFn(AArch64::hsub);
3849 case 32:
3850 return BuildFn(AArch64::ssub);
3851 case 64:
3852 return BuildFn(AArch64::dsub);
3853 default:
3854 return nullptr;
3855 }
3856}
3857
3859AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3860 MachineIRBuilder &MIB,
3861 MachineRegisterInfo &MRI) const {
3862 LLT DstTy = MRI.getType(DstReg);
3863 const TargetRegisterClass *RC =
3864 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3865 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3866 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3867 return nullptr;
3868 }
3869 unsigned SubReg = 0;
3870 if (!getSubRegForClass(RC, TRI, SubReg))
3871 return nullptr;
3872 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3873 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3874 << DstTy.getSizeInBits() << "\n");
3875 return nullptr;
3876 }
3877 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3878 .addReg(SrcReg, 0, SubReg);
3879 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3880 return Copy;
3881}
3882
3883bool AArch64InstructionSelector::selectMergeValues(
3885 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3886 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3887 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3888 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3889 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3890
3891 if (I.getNumOperands() != 3)
3892 return false;
3893
3894 // Merging 2 s64s into an s128.
3895 if (DstTy == LLT::scalar(128)) {
3896 if (SrcTy.getSizeInBits() != 64)
3897 return false;
3898 Register DstReg = I.getOperand(0).getReg();
3899 Register Src1Reg = I.getOperand(1).getReg();
3900 Register Src2Reg = I.getOperand(2).getReg();
3901 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3902 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3903 /* LaneIdx */ 0, RB, MIB);
3904 if (!InsMI)
3905 return false;
3906 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3907 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3908 if (!Ins2MI)
3909 return false;
3912 I.eraseFromParent();
3913 return true;
3914 }
3915
3916 if (RB.getID() != AArch64::GPRRegBankID)
3917 return false;
3918
3919 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3920 return false;
3921
3922 auto *DstRC = &AArch64::GPR64RegClass;
3923 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3924 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3925 TII.get(TargetOpcode::SUBREG_TO_REG))
3926 .addDef(SubToRegDef)
3927 .addImm(0)
3928 .addUse(I.getOperand(1).getReg())
3929 .addImm(AArch64::sub_32);
3930 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3931 // Need to anyext the second scalar before we can use bfm
3932 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3933 TII.get(TargetOpcode::SUBREG_TO_REG))
3934 .addDef(SubToRegDef2)
3935 .addImm(0)
3936 .addUse(I.getOperand(2).getReg())
3937 .addImm(AArch64::sub_32);
3938 MachineInstr &BFM =
3939 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3940 .addDef(I.getOperand(0).getReg())
3941 .addUse(SubToRegDef)
3942 .addUse(SubToRegDef2)
3943 .addImm(32)
3944 .addImm(31);
3945 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3946 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3948 I.eraseFromParent();
3949 return true;
3950}
3951
3952static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3953 const unsigned EltSize) {
3954 // Choose a lane copy opcode and subregister based off of the size of the
3955 // vector's elements.
3956 switch (EltSize) {
3957 case 8:
3958 CopyOpc = AArch64::DUPi8;
3959 ExtractSubReg = AArch64::bsub;
3960 break;
3961 case 16:
3962 CopyOpc = AArch64::DUPi16;
3963 ExtractSubReg = AArch64::hsub;
3964 break;
3965 case 32:
3966 CopyOpc = AArch64::DUPi32;
3967 ExtractSubReg = AArch64::ssub;
3968 break;
3969 case 64:
3970 CopyOpc = AArch64::DUPi64;
3971 ExtractSubReg = AArch64::dsub;
3972 break;
3973 default:
3974 // Unknown size, bail out.
3975 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3976 return false;
3977 }
3978 return true;
3979}
3980
3981MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3982 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3983 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3984 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3985 unsigned CopyOpc = 0;
3986 unsigned ExtractSubReg = 0;
3987 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3988 LLVM_DEBUG(
3989 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3990 return nullptr;
3991 }
3992
3993 const TargetRegisterClass *DstRC =
3994 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3995 if (!DstRC) {
3996 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3997 return nullptr;
3998 }
3999
4000 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4001 const LLT &VecTy = MRI.getType(VecReg);
4002 const TargetRegisterClass *VecRC =
4003 getRegClassForTypeOnBank(VecTy, VecRB, true);
4004 if (!VecRC) {
4005 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4006 return nullptr;
4007 }
4008
4009 // The register that we're going to copy into.
4010 Register InsertReg = VecReg;
4011 if (!DstReg)
4012 DstReg = MRI.createVirtualRegister(DstRC);
4013 // If the lane index is 0, we just use a subregister COPY.
4014 if (LaneIdx == 0) {
4015 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4016 .addReg(VecReg, 0, ExtractSubReg);
4017 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4018 return &*Copy;
4019 }
4020
4021 // Lane copies require 128-bit wide registers. If we're dealing with an
4022 // unpacked vector, then we need to move up to that width. Insert an implicit
4023 // def and a subregister insert to get us there.
4024 if (VecTy.getSizeInBits() != 128) {
4025 MachineInstr *ScalarToVector = emitScalarToVector(
4026 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4027 if (!ScalarToVector)
4028 return nullptr;
4029 InsertReg = ScalarToVector->getOperand(0).getReg();
4030 }
4031
4032 MachineInstr *LaneCopyMI =
4033 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4034 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4035
4036 // Make sure that we actually constrain the initial copy.
4037 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4038 return LaneCopyMI;
4039}
4040
4041bool AArch64InstructionSelector::selectExtractElt(
4043 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4044 "unexpected opcode!");
4045 Register DstReg = I.getOperand(0).getReg();
4046 const LLT NarrowTy = MRI.getType(DstReg);
4047 const Register SrcReg = I.getOperand(1).getReg();
4048 const LLT WideTy = MRI.getType(SrcReg);
4049 (void)WideTy;
4050 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4051 "source register size too small!");
4052 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4053
4054 // Need the lane index to determine the correct copy opcode.
4055 MachineOperand &LaneIdxOp = I.getOperand(2);
4056 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4057
4058 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4059 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4060 return false;
4061 }
4062
4063 // Find the index to extract from.
4064 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4065 if (!VRegAndVal)
4066 return false;
4067 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4068
4069
4070 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4071 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4072 LaneIdx, MIB);
4073 if (!Extract)
4074 return false;
4075
4076 I.eraseFromParent();
4077 return true;
4078}
4079
4080bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4082 unsigned NumElts = I.getNumOperands() - 1;
4083 Register SrcReg = I.getOperand(NumElts).getReg();
4084 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4085 const LLT SrcTy = MRI.getType(SrcReg);
4086
4087 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4088 if (SrcTy.getSizeInBits() > 128) {
4089 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4090 return false;
4091 }
4092
4093 // We implement a split vector operation by treating the sub-vectors as
4094 // scalars and extracting them.
4095 const RegisterBank &DstRB =
4096 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4097 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4098 Register Dst = I.getOperand(OpIdx).getReg();
4099 MachineInstr *Extract =
4100 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4101 if (!Extract)
4102 return false;
4103 }
4104 I.eraseFromParent();
4105 return true;
4106}
4107
4108bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4110 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4111 "unexpected opcode");
4112
4113 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4114 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4115 AArch64::FPRRegBankID ||
4116 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4117 AArch64::FPRRegBankID) {
4118 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4119 "currently unsupported.\n");
4120 return false;
4121 }
4122
4123 // The last operand is the vector source register, and every other operand is
4124 // a register to unpack into.
4125 unsigned NumElts = I.getNumOperands() - 1;
4126 Register SrcReg = I.getOperand(NumElts).getReg();
4127 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4128 const LLT WideTy = MRI.getType(SrcReg);
4129 (void)WideTy;
4130 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4131 "can only unmerge from vector or s128 types!");
4132 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4133 "source register size too small!");
4134
4135 if (!NarrowTy.isScalar())
4136 return selectSplitVectorUnmerge(I, MRI);
4137
4138 // Choose a lane copy opcode and subregister based off of the size of the
4139 // vector's elements.
4140 unsigned CopyOpc = 0;
4141 unsigned ExtractSubReg = 0;
4142 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4143 return false;
4144
4145 // Set up for the lane copies.
4146 MachineBasicBlock &MBB = *I.getParent();
4147
4148 // Stores the registers we'll be copying from.
4149 SmallVector<Register, 4> InsertRegs;
4150
4151 // We'll use the first register twice, so we only need NumElts-1 registers.
4152 unsigned NumInsertRegs = NumElts - 1;
4153
4154 // If our elements fit into exactly 128 bits, then we can copy from the source
4155 // directly. Otherwise, we need to do a bit of setup with some subregister
4156 // inserts.
4157 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4158 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4159 } else {
4160 // No. We have to perform subregister inserts. For each insert, create an
4161 // implicit def and a subregister insert, and save the register we create.
4162 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4163 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4164 *RBI.getRegBank(SrcReg, MRI, TRI));
4165 unsigned SubReg = 0;
4166 bool Found = getSubRegForClass(RC, TRI, SubReg);
4167 (void)Found;
4168 assert(Found && "expected to find last operand's subeg idx");
4169 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4170 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4171 MachineInstr &ImpDefMI =
4172 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4173 ImpDefReg);
4174
4175 // Now, create the subregister insert from SrcReg.
4176 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4177 MachineInstr &InsMI =
4178 *BuildMI(MBB, I, I.getDebugLoc(),
4179 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4180 .addUse(ImpDefReg)
4181 .addUse(SrcReg)
4182 .addImm(SubReg);
4183
4184 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4186
4187 // Save the register so that we can copy from it after.
4188 InsertRegs.push_back(InsertReg);
4189 }
4190 }
4191
4192 // Now that we've created any necessary subregister inserts, we can
4193 // create the copies.
4194 //
4195 // Perform the first copy separately as a subregister copy.
4196 Register CopyTo = I.getOperand(0).getReg();
4197 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4198 .addReg(InsertRegs[0], 0, ExtractSubReg);
4199 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4200
4201 // Now, perform the remaining copies as vector lane copies.
4202 unsigned LaneIdx = 1;
4203 for (Register InsReg : InsertRegs) {
4204 Register CopyTo = I.getOperand(LaneIdx).getReg();
4205 MachineInstr &CopyInst =
4206 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4207 .addUse(InsReg)
4208 .addImm(LaneIdx);
4209 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4210 ++LaneIdx;
4211 }
4212
4213 // Separately constrain the first copy's destination. Because of the
4214 // limitation in constrainOperandRegClass, we can't guarantee that this will
4215 // actually be constrained. So, do it ourselves using the second operand.
4216 const TargetRegisterClass *RC =
4217 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4218 if (!RC) {
4219 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4220 return false;
4221 }
4222
4223 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4224 I.eraseFromParent();
4225 return true;
4226}
4227
4228bool AArch64InstructionSelector::selectConcatVectors(
4230 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4231 "Unexpected opcode");
4232 Register Dst = I.getOperand(0).getReg();
4233 Register Op1 = I.getOperand(1).getReg();
4234 Register Op2 = I.getOperand(2).getReg();
4235 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4236 if (!ConcatMI)
4237 return false;
4238 I.eraseFromParent();
4239 return true;
4240}
4241
4242unsigned
4243AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4244 MachineFunction &MF) const {
4245 Type *CPTy = CPVal->getType();
4246 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4247
4249 return MCP->getConstantPoolIndex(CPVal, Alignment);
4250}
4251
4252MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4253 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4254 const TargetRegisterClass *RC;
4255 unsigned Opc;
4256 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4257 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4258 switch (Size) {
4259 case 16:
4260 RC = &AArch64::FPR128RegClass;
4261 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4262 break;
4263 case 8:
4264 RC = &AArch64::FPR64RegClass;
4265 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4266 break;
4267 case 4:
4268 RC = &AArch64::FPR32RegClass;
4269 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4270 break;
4271 case 2:
4272 RC = &AArch64::FPR16RegClass;
4273 Opc = AArch64::LDRHui;
4274 break;
4275 default:
4276 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4277 << *CPVal->getType());
4278 return nullptr;
4279 }
4280
4281 MachineInstr *LoadMI = nullptr;
4282 auto &MF = MIRBuilder.getMF();
4283 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4284 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4285 // Use load(literal) for tiny code model.
4286 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4287 } else {
4288 auto Adrp =
4289 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4290 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4291
4292 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4293 .addConstantPoolIndex(
4295
4297 }
4298
4300 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4302 Size, Align(Size)));
4304 return LoadMI;
4305}
4306
4307/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4308/// size and RB.
4309static std::pair<unsigned, unsigned>
4310getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4311 unsigned Opc, SubregIdx;
4312 if (RB.getID() == AArch64::GPRRegBankID) {
4313 if (EltSize == 8) {
4314 Opc = AArch64::INSvi8gpr;
4315 SubregIdx = AArch64::bsub;
4316 } else if (EltSize == 16) {
4317 Opc = AArch64::INSvi16gpr;
4318 SubregIdx = AArch64::ssub;
4319 } else if (EltSize == 32) {
4320 Opc = AArch64::INSvi32gpr;
4321 SubregIdx = AArch64::ssub;
4322 } else if (EltSize == 64) {
4323 Opc = AArch64::INSvi64gpr;
4324 SubregIdx = AArch64::dsub;
4325 } else {
4326 llvm_unreachable("invalid elt size!");
4327 }
4328 } else {
4329 if (EltSize == 8) {
4330 Opc = AArch64::INSvi8lane;
4331 SubregIdx = AArch64::bsub;
4332 } else if (EltSize == 16) {
4333 Opc = AArch64::INSvi16lane;
4334 SubregIdx = AArch64::hsub;
4335 } else if (EltSize == 32) {
4336 Opc = AArch64::INSvi32lane;
4337 SubregIdx = AArch64::ssub;
4338 } else if (EltSize == 64) {
4339 Opc = AArch64::INSvi64lane;
4340 SubregIdx = AArch64::dsub;
4341 } else {
4342 llvm_unreachable("invalid elt size!");
4343 }
4344 }
4345 return std::make_pair(Opc, SubregIdx);
4346}
4347
4348MachineInstr *AArch64InstructionSelector::emitInstr(
4349 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4350 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4351 const ComplexRendererFns &RenderFns) const {
4352 assert(Opcode && "Expected an opcode?");
4353 assert(!isPreISelGenericOpcode(Opcode) &&
4354 "Function should only be used to produce selected instructions!");
4355 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4356 if (RenderFns)
4357 for (auto &Fn : *RenderFns)
4358 Fn(MI);
4360 return &*MI;
4361}
4362
4363MachineInstr *AArch64InstructionSelector::emitAddSub(
4364 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4365 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4366 MachineIRBuilder &MIRBuilder) const {
4367 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4368 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4369 auto Ty = MRI.getType(LHS.getReg());
4370 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4371 unsigned Size = Ty.getSizeInBits();
4372 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4373 bool Is32Bit = Size == 32;
4374
4375 // INSTRri form with positive arithmetic immediate.
4376 if (auto Fns = selectArithImmed(RHS))
4377 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4378 MIRBuilder, Fns);
4379
4380 // INSTRri form with negative arithmetic immediate.
4381 if (auto Fns = selectNegArithImmed(RHS))
4382 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4383 MIRBuilder, Fns);
4384
4385 // INSTRrx form.
4386 if (auto Fns = selectArithExtendedRegister(RHS))
4387 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4388 MIRBuilder, Fns);
4389
4390 // INSTRrs form.
4391 if (auto Fns = selectShiftedRegister(RHS))
4392 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4393 MIRBuilder, Fns);
4394 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4395 MIRBuilder);
4396}
4397
4399AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4400 MachineOperand &RHS,
4401 MachineIRBuilder &MIRBuilder) const {
4402 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4403 {{AArch64::ADDXri, AArch64::ADDWri},
4404 {AArch64::ADDXrs, AArch64::ADDWrs},
4405 {AArch64::ADDXrr, AArch64::ADDWrr},
4406 {AArch64::SUBXri, AArch64::SUBWri},
4407 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4408 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4409}
4410
4412AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4413 MachineOperand &RHS,
4414 MachineIRBuilder &MIRBuilder) const {
4415 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4416 {{AArch64::ADDSXri, AArch64::ADDSWri},
4417 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4418 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4419 {AArch64::SUBSXri, AArch64::SUBSWri},
4420 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4421 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4422}
4423
4425AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4426 MachineOperand &RHS,
4427 MachineIRBuilder &MIRBuilder) const {
4428 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4429 {{AArch64::SUBSXri, AArch64::SUBSWri},
4430 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4431 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4432 {AArch64::ADDSXri, AArch64::ADDSWri},
4433 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4434 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4435}
4436
4438AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4439 MachineOperand &RHS,
4440 MachineIRBuilder &MIRBuilder) const {
4441 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4442 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4443 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4444 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4445 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4446}
4447
4449AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4450 MachineOperand &RHS,
4451 MachineIRBuilder &MIRBuilder) const {
4452 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4453 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4454 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4455 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4456 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4457}
4458
4460AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4461 MachineIRBuilder &MIRBuilder) const {
4462 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4463 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4464 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4465 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4466}
4467
4469AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4470 MachineIRBuilder &MIRBuilder) const {
4471 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4472 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4473 LLT Ty = MRI.getType(LHS.getReg());
4474 unsigned RegSize = Ty.getSizeInBits();
4475 bool Is32Bit = (RegSize == 32);
4476 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4477 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4478 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4479 // ANDS needs a logical immediate for its immediate form. Check if we can
4480 // fold one in.
4481 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4482 int64_t Imm = ValAndVReg->Value.getSExtValue();
4483
4485 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4488 return &*TstMI;
4489 }
4490 }
4491
4492 if (auto Fns = selectLogicalShiftedRegister(RHS))
4493 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4494 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4495}
4496
4497MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4498 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4499 MachineIRBuilder &MIRBuilder) const {
4500 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4501 assert(Predicate.isPredicate() && "Expected predicate?");
4502 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4503 LLT CmpTy = MRI.getType(LHS.getReg());
4504 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4505 unsigned Size = CmpTy.getSizeInBits();
4506 (void)Size;
4507 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4508 // Fold the compare into a cmn or tst if possible.
4509 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4510 return FoldCmp;
4511 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4512 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4513}
4514
4515MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4516 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4517 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4518#ifndef NDEBUG
4519 LLT Ty = MRI.getType(Dst);
4520 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4521 "Expected a 32-bit scalar register?");
4522#endif
4523 const Register ZReg = AArch64::WZR;
4524 AArch64CC::CondCode CC1, CC2;
4525 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4526 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4527 if (CC2 == AArch64CC::AL)
4528 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4529 MIRBuilder);
4530 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4531 Register Def1Reg = MRI.createVirtualRegister(RC);
4532 Register Def2Reg = MRI.createVirtualRegister(RC);
4533 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4534 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4535 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4536 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4538 return &*OrMI;
4539}
4540
4541MachineInstr *AArch64InstructionSelector::emitFPCompare(
4542 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4543 std::optional<CmpInst::Predicate> Pred) const {
4544 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4545 LLT Ty = MRI.getType(LHS);
4546 if (Ty.isVector())
4547 return nullptr;
4548 unsigned OpSize = Ty.getSizeInBits();
4549 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4550
4551 // If this is a compare against +0.0, then we don't have
4552 // to explicitly materialize a constant.
4553 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4554 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4555
4556 auto IsEqualityPred = [](CmpInst::Predicate P) {
4557 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4559 };
4560 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4561 // Try commutating the operands.
4562 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4563 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4564 ShouldUseImm = true;
4565 std::swap(LHS, RHS);
4566 }
4567 }
4568 unsigned CmpOpcTbl[2][3] = {
4569 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4570 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4571 unsigned CmpOpc =
4572 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4573
4574 // Partially build the compare. Decide if we need to add a use for the
4575 // third operand based off whether or not we're comparing against 0.0.
4576 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4578 if (!ShouldUseImm)
4579 CmpMI.addUse(RHS);
4581 return &*CmpMI;
4582}
4583
4584MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4585 std::optional<Register> Dst, Register Op1, Register Op2,
4586 MachineIRBuilder &MIRBuilder) const {
4587 // We implement a vector concat by:
4588 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4589 // 2. Insert the upper vector into the destination's upper element
4590 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4591 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4592
4593 const LLT Op1Ty = MRI.getType(Op1);
4594 const LLT Op2Ty = MRI.getType(Op2);
4595
4596 if (Op1Ty != Op2Ty) {
4597 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4598 return nullptr;
4599 }
4600 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4601
4602 if (Op1Ty.getSizeInBits() >= 128) {
4603 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4604 return nullptr;
4605 }
4606
4607 // At the moment we just support 64 bit vector concats.
4608 if (Op1Ty.getSizeInBits() != 64) {
4609 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4610 return nullptr;
4611 }
4612
4613 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4614 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4615 const TargetRegisterClass *DstRC =
4616 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4617
4618 MachineInstr *WidenedOp1 =
4619 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4620 MachineInstr *WidenedOp2 =
4621 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4622 if (!WidenedOp1 || !WidenedOp2) {
4623 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4624 return nullptr;
4625 }
4626
4627 // Now do the insert of the upper element.
4628 unsigned InsertOpc, InsSubRegIdx;
4629 std::tie(InsertOpc, InsSubRegIdx) =
4630 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4631
4632 if (!Dst)
4633 Dst = MRI.createVirtualRegister(DstRC);
4634 auto InsElt =
4635 MIRBuilder
4636 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4637 .addImm(1) /* Lane index */
4638 .addUse(WidenedOp2->getOperand(0).getReg())
4639 .addImm(0);
4641 return &*InsElt;
4642}
4643
4645AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4646 Register Src2, AArch64CC::CondCode Pred,
4647 MachineIRBuilder &MIRBuilder) const {
4648 auto &MRI = *MIRBuilder.getMRI();
4649 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4650 // If we used a register class, then this won't necessarily have an LLT.
4651 // Compute the size based off whether or not we have a class or bank.
4652 unsigned Size;
4653 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4654 Size = TRI.getRegSizeInBits(*RC);
4655 else
4656 Size = MRI.getType(Dst).getSizeInBits();
4657 // Some opcodes use s1.
4658 assert(Size <= 64 && "Expected 64 bits or less only!");
4659 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4660 unsigned Opc = OpcTable[Size == 64];
4661 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4663 return &*CSINC;
4664}
4665
4666MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4667 Register CarryReg) {
4669 unsigned Opcode = I.getOpcode();
4670
4671 // If the instruction is a SUB, we need to negate the carry,
4672 // because borrowing is indicated by carry-flag == 0.
4673 bool NeedsNegatedCarry =
4674 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4675
4676 // If the previous instruction will already produce the correct carry, do not
4677 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4678 // generated during legalization of wide add/sub. This optimization depends on
4679 // these sequences not being interrupted by other instructions.
4680 // We have to select the previous instruction before the carry-using
4681 // instruction is deleted by the calling function, otherwise the previous
4682 // instruction might become dead and would get deleted.
4683 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4684 if (SrcMI == I.getPrevNode()) {
4685 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4686 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4687 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4688 CarrySrcMI->isUnsigned() &&
4689 CarrySrcMI->getCarryOutReg() == CarryReg &&
4690 selectAndRestoreState(*SrcMI))
4691 return nullptr;
4692 }
4693 }
4694
4695 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4696
4697 if (NeedsNegatedCarry) {
4698 // (0 - Carry) sets !C in NZCV when Carry == 1
4699 Register ZReg = AArch64::WZR;
4700 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4701 }
4702
4703 // (Carry - 1) sets !C in NZCV when Carry == 0
4704 auto Fns = select12BitValueWithLeftShift(1);
4705 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4706}
4707
4708bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4710 auto &CarryMI = cast<GAddSubCarryOut>(I);
4711
4712 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4713 // Set NZCV carry according to carry-in VReg
4714 emitCarryIn(I, CarryInMI->getCarryInReg());
4715 }
4716
4717 // Emit the operation and get the correct condition code.
4718 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4719 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4720
4721 Register CarryOutReg = CarryMI.getCarryOutReg();
4722
4723 // Don't convert carry-out to VReg if it is never used
4724 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4725 // Now, put the overflow result in the register given by the first operand
4726 // to the overflow op. CSINC increments the result when the predicate is
4727 // false, so to get the increment when it's true, we need to use the
4728 // inverse. In this case, we want to increment when carry is set.
4729 Register ZReg = AArch64::WZR;
4730 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4731 getInvertedCondCode(OpAndCC.second), MIB);
4732 }
4733
4734 I.eraseFromParent();
4735 return true;
4736}
4737
4738std::pair<MachineInstr *, AArch64CC::CondCode>
4739AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4740 MachineOperand &LHS,
4741 MachineOperand &RHS,
4742 MachineIRBuilder &MIRBuilder) const {
4743 switch (Opcode) {
4744 default:
4745 llvm_unreachable("Unexpected opcode!");
4746 case TargetOpcode::G_SADDO:
4747 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4748 case TargetOpcode::G_UADDO:
4749 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4750 case TargetOpcode::G_SSUBO:
4751 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4752 case TargetOpcode::G_USUBO:
4753 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4754 case TargetOpcode::G_SADDE:
4755 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4756 case TargetOpcode::G_UADDE:
4757 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4758 case TargetOpcode::G_SSUBE:
4759 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4760 case TargetOpcode::G_USUBE:
4761 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4762 }
4763}
4764
4765/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4766/// expressed as a conjunction.
4767/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4768/// changing the conditions on the CMP tests.
4769/// (this means we can call emitConjunctionRec() with
4770/// Negate==true on this sub-tree)
4771/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4772/// cannot do the negation naturally. We are required to
4773/// emit the subtree first in this case.
4774/// \param WillNegate Is true if are called when the result of this
4775/// subexpression must be negated. This happens when the
4776/// outer expression is an OR. We can use this fact to know
4777/// that we have a double negation (or (or ...) ...) that
4778/// can be implemented for free.
4779static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4780 bool WillNegate, MachineRegisterInfo &MRI,
4781 unsigned Depth = 0) {
4782 if (!MRI.hasOneNonDBGUse(Val))
4783 return false;
4784 MachineInstr *ValDef = MRI.getVRegDef(Val);
4785 unsigned Opcode = ValDef->getOpcode();
4786 if (isa<GAnyCmp>(ValDef)) {
4787 CanNegate = true;
4788 MustBeFirst = false;
4789 return true;
4790 }
4791 // Protect against exponential runtime and stack overflow.
4792 if (Depth > 6)
4793 return false;
4794 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4795 bool IsOR = Opcode == TargetOpcode::G_OR;
4796 Register O0 = ValDef->getOperand(1).getReg();
4797 Register O1 = ValDef->getOperand(2).getReg();
4798 bool CanNegateL;
4799 bool MustBeFirstL;
4800 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4801 return false;
4802 bool CanNegateR;
4803 bool MustBeFirstR;
4804 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4805 return false;
4806
4807 if (MustBeFirstL && MustBeFirstR)
4808 return false;
4809
4810 if (IsOR) {
4811 // For an OR expression we need to be able to naturally negate at least
4812 // one side or we cannot do the transformation at all.
4813 if (!CanNegateL && !CanNegateR)
4814 return false;
4815 // If we the result of the OR will be negated and we can naturally negate
4816 // the leaves, then this sub-tree as a whole negates naturally.
4817 CanNegate = WillNegate && CanNegateL && CanNegateR;
4818 // If we cannot naturally negate the whole sub-tree, then this must be
4819 // emitted first.
4820 MustBeFirst = !CanNegate;
4821 } else {
4822 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4823 // We cannot naturally negate an AND operation.
4824 CanNegate = false;
4825 MustBeFirst = MustBeFirstL || MustBeFirstR;
4826 }
4827 return true;
4828 }
4829 return false;
4830}
4831
4832MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4835 MachineIRBuilder &MIB) const {
4836 // TODO: emit CMN as an optimization.
4837 auto &MRI = *MIB.getMRI();
4838 LLT OpTy = MRI.getType(LHS);
4839 unsigned CCmpOpc;
4840 std::optional<ValueAndVReg> C;
4842 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4844 if (C && C->Value.ult(32))
4845 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4846 else
4847 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4848 } else {
4849 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4850 OpTy.getSizeInBits() == 64);
4851 switch (OpTy.getSizeInBits()) {
4852 case 16:
4853 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4854 CCmpOpc = AArch64::FCCMPHrr;
4855 break;
4856 case 32:
4857 CCmpOpc = AArch64::FCCMPSrr;
4858 break;
4859 case 64:
4860 CCmpOpc = AArch64::FCCMPDrr;
4861 break;
4862 default:
4863 return nullptr;
4864 }
4865 }
4867 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4868 auto CCmp =
4869 MIB.buildInstr(CCmpOpc, {}, {LHS});
4870 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4871 CCmp.addImm(C->Value.getZExtValue());
4872 else
4873 CCmp.addReg(RHS);
4874 CCmp.addImm(NZCV).addImm(Predicate);
4876 return &*CCmp;
4877}
4878
4879MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4880 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4881 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4882 // We're at a tree leaf, produce a conditional comparison operation.
4883 auto &MRI = *MIB.getMRI();
4884 MachineInstr *ValDef = MRI.getVRegDef(Val);
4885 unsigned Opcode = ValDef->getOpcode();
4886 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4887 Register LHS = Cmp->getLHSReg();
4888 Register RHS = Cmp->getRHSReg();
4889 CmpInst::Predicate CC = Cmp->getCond();
4890 if (Negate)
4892 if (isa<GICmp>(Cmp)) {
4894 } else {
4895 // Handle special FP cases.
4896 AArch64CC::CondCode ExtraCC;
4897 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4898 // Some floating point conditions can't be tested with a single condition
4899 // code. Construct an additional comparison in this case.
4900 if (ExtraCC != AArch64CC::AL) {
4901 MachineInstr *ExtraCmp;
4902 if (!CCOp)
4903 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4904 else
4905 ExtraCmp =
4906 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4907 CCOp = ExtraCmp->getOperand(0).getReg();
4908 Predicate = ExtraCC;
4909 }
4910 }
4911
4912 // Produce a normal comparison if we are first in the chain
4913 if (!CCOp) {
4914 auto Dst = MRI.cloneVirtualRegister(LHS);
4915 if (isa<GICmp>(Cmp))
4916 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4917 return emitFPCompare(Cmp->getOperand(2).getReg(),
4918 Cmp->getOperand(3).getReg(), MIB);
4919 }
4920 // Otherwise produce a ccmp.
4921 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4922 }
4923 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4924
4925 bool IsOR = Opcode == TargetOpcode::G_OR;
4926
4927 Register LHS = ValDef->getOperand(1).getReg();
4928 bool CanNegateL;
4929 bool MustBeFirstL;
4930 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4931 assert(ValidL && "Valid conjunction/disjunction tree");
4932 (void)ValidL;
4933
4934 Register RHS = ValDef->getOperand(2).getReg();
4935 bool CanNegateR;
4936 bool MustBeFirstR;
4937 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4938 assert(ValidR && "Valid conjunction/disjunction tree");
4939 (void)ValidR;
4940
4941 // Swap sub-tree that must come first to the right side.
4942 if (MustBeFirstL) {
4943 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4944 std::swap(LHS, RHS);
4945 std::swap(CanNegateL, CanNegateR);
4946 std::swap(MustBeFirstL, MustBeFirstR);
4947 }
4948
4949 bool NegateR;
4950 bool NegateAfterR;
4951 bool NegateL;
4952 bool NegateAfterAll;
4953 if (Opcode == TargetOpcode::G_OR) {
4954 // Swap the sub-tree that we can negate naturally to the left.
4955 if (!CanNegateL) {
4956 assert(CanNegateR && "at least one side must be negatable");
4957 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4958 assert(!Negate);
4959 std::swap(LHS, RHS);
4960 NegateR = false;
4961 NegateAfterR = true;
4962 } else {
4963 // Negate the left sub-tree if possible, otherwise negate the result.
4964 NegateR = CanNegateR;
4965 NegateAfterR = !CanNegateR;
4966 }
4967 NegateL = true;
4968 NegateAfterAll = !Negate;
4969 } else {
4970 assert(Opcode == TargetOpcode::G_AND &&
4971 "Valid conjunction/disjunction tree");
4972 assert(!Negate && "Valid conjunction/disjunction tree");
4973
4974 NegateL = false;
4975 NegateR = false;
4976 NegateAfterR = false;
4977 NegateAfterAll = false;
4978 }
4979
4980 // Emit sub-trees.
4981 AArch64CC::CondCode RHSCC;
4982 MachineInstr *CmpR =
4983 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4984 if (NegateAfterR)
4985 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4987 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4988 if (NegateAfterAll)
4989 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4990 return CmpL;
4991}
4992
4993MachineInstr *AArch64InstructionSelector::emitConjunction(
4994 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4995 bool DummyCanNegate;
4996 bool DummyMustBeFirst;
4997 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4998 *MIB.getMRI()))
4999 return nullptr;
5000 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5001}
5002
5003bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5004 MachineInstr &CondMI) {
5005 AArch64CC::CondCode AArch64CC;
5006 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5007 if (!ConjMI)
5008 return false;
5009
5010 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5011 SelI.eraseFromParent();
5012 return true;
5013}
5014
5015bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5016 MachineRegisterInfo &MRI = *MIB.getMRI();
5017 // We want to recognize this pattern:
5018 //
5019 // $z = G_FCMP pred, $x, $y
5020 // ...
5021 // $w = G_SELECT $z, $a, $b
5022 //
5023 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5024 // some copies/truncs in between.)
5025 //
5026 // If we see this, then we can emit something like this:
5027 //
5028 // fcmp $x, $y
5029 // fcsel $w, $a, $b, pred
5030 //
5031 // Rather than emitting both of the rather long sequences in the standard
5032 // G_FCMP/G_SELECT select methods.
5033
5034 // First, check if the condition is defined by a compare.
5035 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5036
5037 // We can only fold if all of the defs have one use.
5038 Register CondDefReg = CondDef->getOperand(0).getReg();
5039 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5040 // Unless it's another select.
5041 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5042 if (CondDef == &UI)
5043 continue;
5044 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5045 return false;
5046 }
5047 }
5048
5049 // Is the condition defined by a compare?
5050 unsigned CondOpc = CondDef->getOpcode();
5051 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5052 if (tryOptSelectConjunction(I, *CondDef))
5053 return true;
5054 return false;
5055 }
5056
5058 if (CondOpc == TargetOpcode::G_ICMP) {
5059 auto Pred =
5060 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5062 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5063 CondDef->getOperand(1), MIB);
5064 } else {
5065 // Get the condition code for the select.
5066 auto Pred =
5067 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5068 AArch64CC::CondCode CondCode2;
5069 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5070
5071 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5072 // instructions to emit the comparison.
5073 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5074 // unnecessary.
5075 if (CondCode2 != AArch64CC::AL)
5076 return false;
5077
5078 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5079 CondDef->getOperand(3).getReg(), MIB)) {
5080 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5081 return false;
5082 }
5083 }
5084
5085 // Emit the select.
5086 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5087 I.getOperand(3).getReg(), CondCode, MIB);
5088 I.eraseFromParent();
5089 return true;
5090}
5091
5092MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5093 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5094 MachineIRBuilder &MIRBuilder) const {
5095 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5096 "Unexpected MachineOperand");
5097 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5098 // We want to find this sort of thing:
5099 // x = G_SUB 0, y
5100 // G_ICMP z, x
5101 //
5102 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5103 // e.g:
5104 //
5105 // cmn z, y
5106
5107 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5108 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5109 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5110 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5111 // Given this:
5112 //
5113 // x = G_SUB 0, y
5114 // G_ICMP x, z
5115 //
5116 // Produce this:
5117 //
5118 // cmn y, z
5119 if (isCMN(LHSDef, P, MRI))
5120 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5121
5122 // Same idea here, but with the RHS of the compare instead:
5123 //
5124 // Given this:
5125 //
5126 // x = G_SUB 0, y
5127 // G_ICMP z, x
5128 //
5129 // Produce this:
5130 //
5131 // cmn z, y
5132 if (isCMN(RHSDef, P, MRI))
5133 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5134
5135 // Given this:
5136 //
5137 // z = G_AND x, y
5138 // G_ICMP z, 0
5139 //
5140 // Produce this if the compare is signed:
5141 //
5142 // tst x, y
5143 if (!CmpInst::isUnsigned(P) && LHSDef &&
5144 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5145 // Make sure that the RHS is 0.
5146 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5147 if (!ValAndVReg || ValAndVReg->Value != 0)
5148 return nullptr;
5149
5150 return emitTST(LHSDef->getOperand(1),
5151 LHSDef->getOperand(2), MIRBuilder);
5152 }
5153
5154 return nullptr;
5155}
5156
5157bool AArch64InstructionSelector::selectShuffleVector(
5159 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5160 Register Src1Reg = I.getOperand(1).getReg();
5161 const LLT Src1Ty = MRI.getType(Src1Reg);
5162 Register Src2Reg = I.getOperand(2).getReg();
5163 const LLT Src2Ty = MRI.getType(Src2Reg);
5164 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5165
5166 MachineBasicBlock &MBB = *I.getParent();
5167 MachineFunction &MF = *MBB.getParent();
5168 LLVMContext &Ctx = MF.getFunction().getContext();
5169
5170 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5171 // it's originated from a <1 x T> type. Those should have been lowered into
5172 // G_BUILD_VECTOR earlier.
5173 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5174 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5175 return false;
5176 }
5177
5178 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5179
5181 for (int Val : Mask) {
5182 // For now, any undef indexes we'll just assume to be 0. This should be
5183 // optimized in future, e.g. to select DUP etc.
5184 Val = Val < 0 ? 0 : Val;
5185 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5186 unsigned Offset = Byte + Val * BytesPerElt;
5187 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5188 }
5189 }
5190
5191 // Use a constant pool to load the index vector for TBL.
5192 Constant *CPVal = ConstantVector::get(CstIdxs);
5193 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5194 if (!IndexLoad) {
5195 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5196 return false;
5197 }
5198
5199 if (DstTy.getSizeInBits() != 128) {
5200 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5201 // This case can be done with TBL1.
5203 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5204 if (!Concat) {
5205 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5206 return false;
5207 }
5208
5209 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5210 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5211 IndexLoad->getOperand(0).getReg(), MIB);
5212
5213 auto TBL1 = MIB.buildInstr(
5214 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5215 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5217
5218 auto Copy =
5219 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5220 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5221 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5222 I.eraseFromParent();
5223 return true;
5224 }
5225
5226 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5227 // Q registers for regalloc.
5228 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5229 auto RegSeq = createQTuple(Regs, MIB);
5230 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5231 {RegSeq, IndexLoad->getOperand(0)});
5233 I.eraseFromParent();
5234 return true;
5235}
5236
5237MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5238 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5239 unsigned LaneIdx, const RegisterBank &RB,
5240 MachineIRBuilder &MIRBuilder) const {
5241 MachineInstr *InsElt = nullptr;
5242 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5243 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5244
5245 // Create a register to define with the insert if one wasn't passed in.
5246 if (!DstReg)
5247 DstReg = MRI.createVirtualRegister(DstRC);
5248
5249 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5250 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5251
5252 if (RB.getID() == AArch64::FPRRegBankID) {
5253 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5254 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5255 .addImm(LaneIdx)
5256 .addUse(InsSub->getOperand(0).getReg())
5257 .addImm(0);
5258 } else {
5259 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5260 .addImm(LaneIdx)
5261 .addUse(EltReg);
5262 }
5263
5265 return InsElt;
5266}
5267
5268bool AArch64InstructionSelector::selectUSMovFromExtend(
5270 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5271 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5272 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5273 return false;
5274 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5275 const Register DefReg = MI.getOperand(0).getReg();
5276 const LLT DstTy = MRI.getType(DefReg);
5277 unsigned DstSize = DstTy.getSizeInBits();
5278
5279 if (DstSize != 32 && DstSize != 64)
5280 return false;
5281
5282 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5283 MI.getOperand(1).getReg(), MRI);
5284 int64_t Lane;
5285 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5286 return false;
5287 Register Src0 = Extract->getOperand(1).getReg();
5288
5289 const LLT &VecTy = MRI.getType(Src0);
5290
5291 if (VecTy.getSizeInBits() != 128) {
5292 const MachineInstr *ScalarToVector = emitScalarToVector(
5293 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5294 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5295 Src0 = ScalarToVector->getOperand(0).getReg();
5296 }
5297
5298 unsigned Opcode;
5299 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5300 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5301 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5302 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5303 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5304 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5305 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5306 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5307 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5308 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5309 else
5310 llvm_unreachable("Unexpected type combo for S/UMov!");
5311
5312 // We may need to generate one of these, depending on the type and sign of the
5313 // input:
5314 // DstReg = SMOV Src0, Lane;
5315 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5316 MachineInstr *ExtI = nullptr;
5317 if (DstSize == 64 && !IsSigned) {
5318 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5319 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5320 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5321 .addImm(0)
5322 .addUse(NewReg)
5323 .addImm(AArch64::sub_32);
5324 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5325 } else
5326 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5327
5329 MI.eraseFromParent();
5330 return true;
5331}
5332
5333bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5335 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
5336
5337 // Get information on the destination.
5338 Register DstReg = I.getOperand(0).getReg();
5339 const LLT DstTy = MRI.getType(DstReg);
5340 unsigned VecSize = DstTy.getSizeInBits();
5341
5342 // Get information on the element we want to insert into the destination.
5343 Register EltReg = I.getOperand(2).getReg();
5344 const LLT EltTy = MRI.getType(EltReg);
5345 unsigned EltSize = EltTy.getSizeInBits();
5346 if (EltSize < 8 || EltSize > 64)
5347 return false;
5348
5349 // Find the definition of the index. Bail out if it's not defined by a
5350 // G_CONSTANT.
5351 Register IdxReg = I.getOperand(3).getReg();
5352 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5353 if (!VRegAndVal)
5354 return false;
5355 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5356
5357 // Perform the lane insert.
5358 Register SrcReg = I.getOperand(1).getReg();
5359 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5360
5361 if (VecSize < 128) {
5362 // If the vector we're inserting into is smaller than 128 bits, widen it
5363 // to 128 to do the insert.
5364 MachineInstr *ScalarToVec =
5365 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5366 if (!ScalarToVec)
5367 return false;
5368 SrcReg = ScalarToVec->getOperand(0).getReg();
5369 }
5370
5371 // Create an insert into a new FPR128 register.
5372 // Note that if our vector is already 128 bits, we end up emitting an extra
5373 // register.
5374 MachineInstr *InsMI =
5375 emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5376
5377 if (VecSize < 128) {
5378 // If we had to widen to perform the insert, then we have to demote back to
5379 // the original size to get the result we want.
5380 if (!emitNarrowVector(DstReg, InsMI->getOperand(0).getReg(), MIB, MRI))
5381 return false;
5382 } else {
5383 // No widening needed.
5384 InsMI->getOperand(0).setReg(DstReg);
5386 }
5387
5388 I.eraseFromParent();
5389 return true;
5390}
5391
5392MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5393 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5394 unsigned int Op;
5395 if (DstSize == 128) {
5396 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5397 return nullptr;
5398 Op = AArch64::MOVIv16b_ns;
5399 } else {
5400 Op = AArch64::MOVIv8b_ns;
5401 }
5402
5403 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5404
5407 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5409 return &*Mov;
5410 }
5411 return nullptr;
5412}
5413
5414MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5415 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5416 bool Inv) {
5417
5418 unsigned int Op;
5419 if (DstSize == 128) {
5420 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5421 return nullptr;
5422 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5423 } else {
5424 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5425 }
5426
5427 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5428 uint64_t Shift;
5429
5432 Shift = 0;
5433 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5435 Shift = 8;
5436 } else
5437 return nullptr;
5438
5439 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5441 return &*Mov;
5442}
5443
5444MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5445 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5446 bool Inv) {
5447
5448 unsigned int Op;
5449 if (DstSize == 128) {
5450 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5451 return nullptr;
5452 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5453 } else {
5454 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5455 }
5456
5457 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5458 uint64_t Shift;
5459
5462 Shift = 0;
5463 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5465 Shift = 8;
5466 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5468 Shift = 16;
5469 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5471 Shift = 24;
5472 } else
5473 return nullptr;
5474
5475 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5477 return &*Mov;
5478}
5479
5480MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5481 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5482
5483 unsigned int Op;
5484 if (DstSize == 128) {
5485 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5486 return nullptr;
5487 Op = AArch64::MOVIv2d_ns;
5488 } else {
5489 Op = AArch64::MOVID;
5490 }
5491
5492 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5495 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5497 return &*Mov;
5498 }
5499 return nullptr;
5500}
5501
5502MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5503 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5504 bool Inv) {
5505
5506 unsigned int Op;
5507 if (DstSize == 128) {
5508 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5509 return nullptr;
5510 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5511 } else {
5512 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5513 }
5514
5515 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5516 uint64_t Shift;
5517
5520 Shift = 264;
5521 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5523 Shift = 272;
5524 } else
5525 return nullptr;
5526
5527 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5529 return &*Mov;
5530}
5531
5532MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5533 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5534
5535 unsigned int Op;
5536 bool IsWide = false;
5537 if (DstSize == 128) {
5538 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5539 return nullptr;
5540 Op = AArch64::FMOVv4f32_ns;
5541 IsWide = true;
5542 } else {
5543 Op = AArch64::FMOVv2f32_ns;
5544 }
5545
5546 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5547
5550 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5552 Op = AArch64::FMOVv2f64_ns;
5553 } else
5554 return nullptr;
5555
5556 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5558 return &*Mov;
5559}
5560
5561bool AArch64InstructionSelector::selectIndexedExtLoad(
5563 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5564 Register Dst = ExtLd.getDstReg();
5565 Register WriteBack = ExtLd.getWritebackReg();
5566 Register Base = ExtLd.getBaseReg();
5567 Register Offset = ExtLd.getOffsetReg();
5568 LLT Ty = MRI.getType(Dst);
5569 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5570 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5571 bool IsPre = ExtLd.isPre();
5572 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5573 bool InsertIntoXReg = false;
5574 bool IsDst64 = Ty.getSizeInBits() == 64;
5575
5576 unsigned Opc = 0;
5577 LLT NewLdDstTy;
5578 LLT s32 = LLT::scalar(32);
5579 LLT s64 = LLT::scalar(64);
5580
5581 if (MemSizeBits == 8) {
5582 if (IsSExt) {
5583 if (IsDst64)
5584 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5585 else
5586 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5587 NewLdDstTy = IsDst64 ? s64 : s32;
5588 } else {
5589 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5590 InsertIntoXReg = IsDst64;
5591 NewLdDstTy = s32;
5592 }
5593 } else if (MemSizeBits == 16) {
5594 if (IsSExt) {
5595 if (IsDst64)
5596 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5597 else
5598 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5599 NewLdDstTy = IsDst64 ? s64 : s32;
5600 } else {
5601 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5602 InsertIntoXReg = IsDst64;
5603 NewLdDstTy = s32;
5604 }
5605 } else if (MemSizeBits == 32) {
5606 if (IsSExt) {
5607 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5608 NewLdDstTy = s64;
5609 } else {
5610 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5611 InsertIntoXReg = IsDst64;
5612 NewLdDstTy = s32;
5613 }
5614 } else {
5615 llvm_unreachable("Unexpected size for indexed load");
5616 }
5617
5618 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5619 return false; // We should be on gpr.
5620
5621 auto Cst = getIConstantVRegVal(Offset, MRI);
5622 if (!Cst)
5623 return false; // Shouldn't happen, but just in case.
5624
5625 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5626 .addImm(Cst->getSExtValue());
5627 LdMI.cloneMemRefs(ExtLd);
5629 // Make sure to select the load with the MemTy as the dest type, and then
5630 // insert into X reg if needed.
5631 if (InsertIntoXReg) {
5632 // Generate a SUBREG_TO_REG.
5633 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5634 .addImm(0)
5635 .addUse(LdMI.getReg(1))
5636 .addImm(AArch64::sub_32);
5637 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5638 MRI);
5639 } else {
5640 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5641 selectCopy(*Copy, TII, MRI, TRI, RBI);
5642 }
5643 MI.eraseFromParent();
5644
5645 return true;
5646}
5647
5648bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5650 auto &Ld = cast<GIndexedLoad>(MI);
5651 Register Dst = Ld.getDstReg();
5652 Register WriteBack = Ld.getWritebackReg();
5653 Register Base = Ld.getBaseReg();
5654 Register Offset = Ld.getOffsetReg();
5655 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5656 "Unexpected type for indexed load");
5657 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5658
5659 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5660 return selectIndexedExtLoad(MI, MRI);
5661
5662 unsigned Opc = 0;
5663 if (Ld.isPre()) {
5664 static constexpr unsigned GPROpcodes[] = {
5665 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5666 AArch64::LDRXpre};
5667 static constexpr unsigned FPROpcodes[] = {
5668 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5669 AArch64::LDRQpre};
5670 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5671 Opc = FPROpcodes[Log2_32(MemSize)];
5672 else
5673 Opc = GPROpcodes[Log2_32(MemSize)];
5674 } else {
5675 static constexpr unsigned GPROpcodes[] = {
5676 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5677 AArch64::LDRXpost};
5678 static constexpr unsigned FPROpcodes[] = {
5679 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5680 AArch64::LDRDpost, AArch64::LDRQpost};
5681 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5682 Opc = FPROpcodes[Log2_32(MemSize)];
5683 else
5684 Opc = GPROpcodes[Log2_32(MemSize)];
5685 }
5686 auto Cst = getIConstantVRegVal(Offset, MRI);
5687 if (!Cst)
5688 return false; // Shouldn't happen, but just in case.
5689 auto LdMI =
5690 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5691 LdMI.cloneMemRefs(Ld);
5693 MI.eraseFromParent();
5694 return true;
5695}
5696
5697bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5699 Register Dst = I.getWritebackReg();
5700 Register Val = I.getValueReg();
5701 Register Base = I.getBaseReg();
5702 Register Offset = I.getOffsetReg();
5703 LLT ValTy = MRI.getType(Val);
5704 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5705
5706 unsigned Opc = 0;
5707 if (I.isPre()) {
5708 static constexpr unsigned GPROpcodes[] = {
5709 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5710 AArch64::STRXpre};
5711 static constexpr unsigned FPROpcodes[] = {
5712 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5713 AArch64::STRQpre};
5714
5715 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5716 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5717 else
5718 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5719 } else {
5720 static constexpr unsigned GPROpcodes[] = {
5721 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5722 AArch64::STRXpost};
5723 static constexpr unsigned FPROpcodes[] = {
5724 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5725 AArch64::STRDpost, AArch64::STRQpost};
5726
5727 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5728 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5729 else
5730 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5731 }
5732
5733 auto Cst = getIConstantVRegVal(Offset, MRI);
5734 if (!Cst)
5735 return false; // Shouldn't happen, but just in case.
5736 auto Str =
5737 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5738 Str.cloneMemRefs(I);
5740 I.eraseFromParent();
5741 return true;
5742}
5743
5745AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5746 MachineIRBuilder &MIRBuilder,
5748 LLT DstTy = MRI.getType(Dst);
5749 unsigned DstSize = DstTy.getSizeInBits();
5750 if (CV->isNullValue()) {
5751 if (DstSize == 128) {
5752 auto Mov =
5753 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5755 return &*Mov;
5756 }
5757
5758 if (DstSize == 64) {
5759 auto Mov =
5760 MIRBuilder
5761 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5762 .addImm(0);
5763 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5764 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5765 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5766 return &*Copy;
5767 }
5768 }
5769
5770 if (CV->getSplatValue()) {
5771 APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5772 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5773 MachineInstr *NewOp;
5774 bool Inv = false;
5775 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5776 (NewOp =
5777 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5778 (NewOp =
5779 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5780 (NewOp =
5781 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5782 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5783 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5784 return NewOp;
5785
5786 DefBits = ~DefBits;
5787 Inv = true;
5788 if ((NewOp =
5789 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5790 (NewOp =
5791 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5792 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5793 return NewOp;
5794 return nullptr;
5795 };
5796
5797 if (auto *NewOp = TryMOVIWithBits(DefBits))
5798 return NewOp;
5799
5800 // See if a fneg of the constant can be materialized with a MOVI, etc
5801 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5802 unsigned NegOpc) -> MachineInstr * {
5803 // FNegate each sub-element of the constant
5804 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5805 APInt NegBits(DstSize, 0);
5806 unsigned NumElts = DstSize / NumBits;
5807 for (unsigned i = 0; i < NumElts; i++)
5808 NegBits |= Neg << (NumBits * i);
5809 NegBits = DefBits ^ NegBits;
5810
5811 // Try to create the new constants with MOVI, and if so generate a fneg
5812 // for it.
5813 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5814 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5815 NewOp->getOperand(0).setReg(NewDst);
5816 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5817 }
5818 return nullptr;
5819 };
5820 MachineInstr *R;
5821 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5822 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5823 (STI.hasFullFP16() &&
5824 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5825 return R;
5826 }
5827
5828 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5829 if (!CPLoad) {
5830 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5831 return nullptr;
5832 }
5833
5834 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5835 RBI.constrainGenericRegister(
5836 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5837 return &*Copy;
5838}
5839
5840bool AArch64InstructionSelector::tryOptConstantBuildVec(
5842 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5843 unsigned DstSize = DstTy.getSizeInBits();
5844 assert(DstSize <= 128 && "Unexpected build_vec type!");
5845 if (DstSize < 32)
5846 return false;
5847 // Check if we're building a constant vector, in which case we want to
5848 // generate a constant pool load instead of a vector insert sequence.
5850 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5851 // Try to find G_CONSTANT or G_FCONSTANT
5852 auto *OpMI =
5853 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5854 if (OpMI)
5855 Csts.emplace_back(
5856 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5857 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5858 I.getOperand(Idx).getReg(), MRI)))
5859 Csts.emplace_back(
5860 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5861 else
5862 return false;
5863 }
5864 Constant *CV = ConstantVector::get(Csts);
5865 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5866 return false;
5867 I.eraseFromParent();
5868 return true;
5869}
5870
5871bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5873 // Given:
5874 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5875 //
5876 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5877 Register Dst = I.getOperand(0).getReg();
5878 Register EltReg = I.getOperand(1).getReg();
5879 LLT EltTy = MRI.getType(EltReg);
5880 // If the index isn't on the same bank as its elements, then this can't be a
5881 // SUBREG_TO_REG.
5882 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5883 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5884 if (EltRB != DstRB)
5885 return false;
5886 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5887 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5888 }))
5889 return false;
5890 unsigned SubReg;
5891 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5892 if (!EltRC)
5893 return false;
5894 const TargetRegisterClass *DstRC =
5895 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5896 if (!DstRC)
5897 return false;
5898 if (!getSubRegForClass(EltRC, TRI, SubReg))
5899 return false;
5900 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5901 .addImm(0)
5902 .addUse(EltReg)
5903 .addImm(SubReg);
5904 I.eraseFromParent();
5905 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5906 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5907}
5908
5909bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5911 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5912 // Until we port more of the optimized selections, for now just use a vector
5913 // insert sequence.
5914 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5915 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5916 unsigned EltSize = EltTy.getSizeInBits();
5917
5918 if (tryOptConstantBuildVec(I, DstTy, MRI))
5919 return true;
5920 if (tryOptBuildVecToSubregToReg(I, MRI))
5921 return true;
5922
5923 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5924 return false; // Don't support all element types yet.
5925 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5926
5927 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5928 MachineInstr *ScalarToVec =
5929 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5930 I.getOperand(1).getReg(), MIB);
5931 if (!ScalarToVec)
5932 return false;
5933
5934 Register DstVec = ScalarToVec->getOperand(0).getReg();
5935 unsigned DstSize = DstTy.getSizeInBits();
5936
5937 // Keep track of the last MI we inserted. Later on, we might be able to save
5938 // a copy using it.
5939 MachineInstr *PrevMI = ScalarToVec;
5940 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5941 // Note that if we don't do a subregister copy, we can end up making an
5942 // extra register.
5943 Register OpReg = I.getOperand(i).getReg();
5944 // Do not emit inserts for undefs
5945 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5946 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5947 DstVec = PrevMI->getOperand(0).getReg();
5948 }
5949 }
5950
5951 // If DstTy's size in bits is less than 128, then emit a subregister copy
5952 // from DstVec to the last register we've defined.
5953 if (DstSize < 128) {
5954 // Force this to be FPR using the destination vector.
5955 const TargetRegisterClass *RC =
5956 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5957 if (!RC)
5958 return false;
5959 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5960 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5961 return false;
5962 }
5963
5964 unsigned SubReg = 0;
5965 if (!getSubRegForClass(RC, TRI, SubReg))
5966 return false;
5967 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5968 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5969 << "\n");
5970 return false;
5971 }
5972
5973 Register Reg = MRI.createVirtualRegister(RC);
5974 Register DstReg = I.getOperand(0).getReg();
5975
5976 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5977 MachineOperand &RegOp = I.getOperand(1);
5978 RegOp.setReg(Reg);
5979 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5980 } else {
5981 // We either have a vector with all elements (except the first one) undef or
5982 // at least one non-undef non-first element. In the first case, we need to
5983 // constrain the output register ourselves as we may have generated an
5984 // INSERT_SUBREG operation which is a generic operation for which the
5985 // output regclass cannot be automatically chosen.
5986 //
5987 // In the second case, there is no need to do this as it may generate an
5988 // instruction like INSvi32gpr where the regclass can be automatically
5989 // chosen.
5990 //
5991 // Also, we save a copy by re-using the destination register on the final
5992 // insert.
5993 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5995
5996 Register DstReg = PrevMI->getOperand(0).getReg();
5997 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5998 const TargetRegisterClass *RC =
5999 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
6000 RBI.constrainGenericRegister(DstReg, *RC, MRI);
6001 }
6002 }
6003
6004 I.eraseFromParent();
6005 return true;
6006}
6007
6008bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
6009 unsigned NumVecs,
6010 MachineInstr &I) {
6011 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6012 assert(Opc && "Expected an opcode?");
6013 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6014 auto &MRI = *MIB.getMRI();
6015 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6016 unsigned Size = Ty.getSizeInBits();
6017 assert((Size == 64 || Size == 128) &&
6018 "Destination must be 64 bits or 128 bits?");
6019 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
6020 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
6021 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
6022 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
6023 Load.cloneMemRefs(I);
6025 Register SelectedLoadDst = Load->getOperand(0).getReg();
6026 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6027 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
6028 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6029 // Emit the subreg copies and immediately select them.
6030 // FIXME: We should refactor our copy code into an emitCopy helper and
6031 // clean up uses of this pattern elsewhere in the selector.
6032 selectCopy(*Vec, TII, MRI, TRI, RBI);
6033 }
6034 return true;
6035}
6036
6037bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6038 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6039 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6040 assert(Opc && "Expected an opcode?");
6041 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6042 auto &MRI = *MIB.getMRI();
6043 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6044 bool Narrow = Ty.getSizeInBits() == 64;
6045
6046 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
6047 SmallVector<Register, 4> Regs(NumVecs);
6048 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
6049 [](auto MO) { return MO.getReg(); });
6050
6051 if (Narrow) {
6052 transform(Regs, Regs.begin(), [this](Register Reg) {
6053 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6054 ->getOperand(0)
6055 .getReg();
6056 });
6057 Ty = Ty.multiplyElements(2);
6058 }
6059
6060 Register Tuple = createQTuple(Regs, MIB);
6061 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
6062 if (!LaneNo)
6063 return false;
6064
6065 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6066 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6067 .addReg(Tuple)
6068 .addImm(LaneNo->getZExtValue())
6069 .addReg(Ptr);
6070 Load.cloneMemRefs(I);
6072 Register SelectedLoadDst = Load->getOperand(0).getReg();
6073 unsigned SubReg = AArch64::qsub0;
6074 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6075 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6076 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6077 : DstOp(I.getOperand(Idx).getReg())},
6078 {})
6079 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6080 Register WideReg = Vec.getReg(0);
6081 // Emit the subreg copies and immediately select them.
6082 selectCopy(*Vec, TII, MRI, TRI, RBI);
6083 if (Narrow &&
6084 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6085 return false;
6086 }
6087 return true;
6088}
6089
6090void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6091 unsigned NumVecs,
6092 unsigned Opc) {
6093 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6094 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6095 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6096
6097 SmallVector<Register, 2> Regs(NumVecs);
6098 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6099 Regs.begin(), [](auto MO) { return MO.getReg(); });
6100
6101 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6102 : createDTuple(Regs, MIB);
6103 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6104 Store.cloneMemRefs(I);
6106}
6107
6108bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6109 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6110 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6111 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6112 bool Narrow = Ty.getSizeInBits() == 64;
6113
6114 SmallVector<Register, 2> Regs(NumVecs);
6115 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6116 Regs.begin(), [](auto MO) { return MO.getReg(); });
6117
6118 if (Narrow)
6119 transform(Regs, Regs.begin(), [this](Register Reg) {
6120 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6121 ->getOperand(0)
6122 .getReg();
6123 });
6124
6125 Register Tuple = createQTuple(Regs, MIB);
6126
6127 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6128 if (!LaneNo)
6129 return false;
6130 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6131 auto Store = MIB.buildInstr(Opc, {}, {})
6132 .addReg(Tuple)
6133 .addImm(LaneNo->getZExtValue())
6134 .addReg(Ptr);
6135 Store.cloneMemRefs(I);
6137 return true;
6138}
6139
6140bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6142 // Find the intrinsic ID.
6143 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6144
6145 const LLT S8 = LLT::scalar(8);
6146 const LLT S16 = LLT::scalar(16);
6147 const LLT S32 = LLT::scalar(32);
6148 const LLT S64 = LLT::scalar(64);
6149 const LLT P0 = LLT::pointer(0, 64);
6150 // Select the instruction.
6151 switch (IntrinID) {
6152 default:
6153 return false;
6154 case Intrinsic::aarch64_ldxp:
6155 case Intrinsic::aarch64_ldaxp: {
6156 auto NewI = MIB.buildInstr(
6157 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6158 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6159 {I.getOperand(3)});
6160 NewI.cloneMemRefs(I);
6162 break;
6163 }
6164 case Intrinsic::aarch64_neon_ld1x2: {
6165 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6166 unsigned Opc = 0;
6167 if (Ty == LLT::fixed_vector(8, S8))
6168 Opc = AArch64::LD1Twov8b;
6169 else if (Ty == LLT::fixed_vector(16, S8))
6170 Opc = AArch64::LD1Twov16b;
6171 else if (Ty == LLT::fixed_vector(4, S16))
6172 Opc = AArch64::LD1Twov4h;
6173 else if (Ty == LLT::fixed_vector(8, S16))
6174 Opc = AArch64::LD1Twov8h;
6175 else if (Ty == LLT::fixed_vector(2, S32))
6176 Opc = AArch64::LD1Twov2s;
6177 else if (Ty == LLT::fixed_vector(4, S32))
6178 Opc = AArch64::LD1Twov4s;
6179 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6180 Opc = AArch64::LD1Twov2d;
6181 else if (Ty == S64 || Ty == P0)
6182 Opc = AArch64::LD1Twov1d;
6183 else
6184 llvm_unreachable("Unexpected type for ld1x2!");
6185 selectVectorLoadIntrinsic(Opc, 2, I);
6186 break;
6187 }
6188 case Intrinsic::aarch64_neon_ld1x3: {
6189 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6190 unsigned Opc = 0;
6191 if (Ty == LLT::fixed_vector(8, S8))
6192 Opc = AArch64::LD1Threev8b;
6193 else if (Ty == LLT::fixed_vector(16, S8))
6194 Opc = AArch64::LD1Threev16b;
6195 else if (Ty == LLT::fixed_vector(4, S16))
6196 Opc = AArch64::LD1Threev4h;
6197 else if (Ty == LLT::fixed_vector(8, S16))
6198 Opc = AArch64::LD1Threev8h;
6199 else if (Ty == LLT::fixed_vector(2, S32))
6200 Opc = AArch64::LD1Threev2s;
6201 else if (Ty == LLT::fixed_vector(4, S32))
6202 Opc = AArch64::LD1Threev4s;
6203 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6204 Opc = AArch64::LD1Threev2d;
6205 else if (Ty == S64 || Ty == P0)
6206 Opc = AArch64::LD1Threev1d;
6207 else
6208 llvm_unreachable("Unexpected type for ld1x3!");
6209 selectVectorLoadIntrinsic(Opc, 3, I);
6210 break;
6211 }
6212 case Intrinsic::aarch64_neon_ld1x4: {
6213 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6214 unsigned Opc = 0;
6215 if (Ty == LLT::fixed_vector(8, S8))
6216 Opc = AArch64::LD1Fourv8b;
6217 else if (Ty == LLT::fixed_vector(16, S8))
6218 Opc = AArch64::LD1Fourv16b;
6219 else if (Ty == LLT::fixed_vector(4, S16))
6220 Opc = AArch64::LD1Fourv4h;
6221 else if (Ty == LLT::fixed_vector(8, S16))
6222 Opc = AArch64::LD1Fourv8h;
6223 else if (Ty == LLT::fixed_vector(2, S32))
6224 Opc = AArch64::LD1Fourv2s;
6225 else if (Ty == LLT::fixed_vector(4, S32))
6226 Opc = AArch64::LD1Fourv4s;
6227 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6228 Opc = AArch64::LD1Fourv2d;
6229 else if (Ty == S64 || Ty == P0)
6230 Opc = AArch64::LD1Fourv1d;
6231 else
6232 llvm_unreachable("Unexpected type for ld1x4!");
6233 selectVectorLoadIntrinsic(Opc, 4, I);
6234 break;
6235 }
6236 case Intrinsic::aarch64_neon_ld2: {
6237 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6238 unsigned Opc = 0;
6239 if (Ty == LLT::fixed_vector(8, S8))
6240 Opc = AArch64::LD2Twov8b;
6241 else if (Ty == LLT::fixed_vector(16, S8))
6242 Opc = AArch64::LD2Twov16b;
6243 else if (Ty == LLT::fixed_vector(4, S16))
6244 Opc = AArch64::LD2Twov4h;
6245 else if (Ty == LLT::fixed_vector(8, S16))
6246 Opc = AArch64::LD2Twov8h;
6247 else if (Ty == LLT::fixed_vector(2, S32))
6248 Opc = AArch64::LD2Twov2s;
6249 else if (Ty == LLT::fixed_vector(4, S32))
6250 Opc = AArch64::LD2Twov4s;
6251 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6252 Opc = AArch64::LD2Twov2d;
6253 else if (Ty == S64 || Ty == P0)
6254 Opc = AArch64::LD1Twov1d;
6255 else
6256 llvm_unreachable("Unexpected type for ld2!");
6257 selectVectorLoadIntrinsic(Opc, 2, I);
6258 break;
6259 }
6260 case Intrinsic::aarch64_neon_ld2lane: {
6261 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6262 unsigned Opc;
6263 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6264 Opc = AArch64::LD2i8;
6265 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6266 Opc = AArch64::LD2i16;
6267 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6268 Opc = AArch64::LD2i32;
6269 else if (Ty == LLT::fixed_vector(2, S64) ||
6270 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6271 Opc = AArch64::LD2i64;
6272 else
6273 llvm_unreachable("Unexpected type for st2lane!");
6274 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6275 return false;
6276 break;
6277 }
6278 case Intrinsic::aarch64_neon_ld2r: {
6279 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6280 unsigned Opc = 0;
6281 if (Ty == LLT::fixed_vector(8, S8))
6282 Opc = AArch64::LD2Rv8b;
6283 else if (Ty == LLT::fixed_vector(16, S8))
6284 Opc = AArch64::LD2Rv16b;
6285 else if (Ty == LLT::fixed_vector(4, S16))
6286 Opc = AArch64::LD2Rv4h;
6287 else if (Ty == LLT::fixed_vector(8, S16))
6288 Opc = AArch64::LD2Rv8h;
6289 else if (Ty == LLT::fixed_vector(2, S32))
6290 Opc = AArch64::LD2Rv2s;
6291 else if (Ty == LLT::fixed_vector(4, S32))
6292 Opc = AArch64::LD2Rv4s;
6293 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6294 Opc = AArch64::LD2Rv2d;
6295 else if (Ty == S64 || Ty == P0)
6296 Opc = AArch64::LD2Rv1d;
6297 else
6298 llvm_unreachable("Unexpected type for ld2r!");
6299 selectVectorLoadIntrinsic(Opc, 2, I);
6300 break;
6301 }
6302 case Intrinsic::aarch64_neon_ld3: {
6303 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6304 unsigned Opc = 0;
6305 if (Ty == LLT::fixed_vector(8, S8))
6306 Opc = AArch64::LD3Threev8b;
6307 else if (Ty == LLT::fixed_vector(16, S8))
6308 Opc = AArch64::LD3Threev16b;
6309 else if (Ty == LLT::fixed_vector(4, S16))
6310 Opc = AArch64::LD3Threev4h;
6311 else if (Ty == LLT::fixed_vector(8, S16))
6312 Opc = AArch64::LD3Threev8h;
6313 else if (Ty == LLT::fixed_vector(2, S32))
6314 Opc = AArch64::LD3Threev2s;
6315 else if (Ty == LLT::fixed_vector(4, S32))
6316 Opc = AArch64::LD3Threev4s;
6317 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6318 Opc = AArch64::LD3Threev2d;
6319 else if (Ty == S64 || Ty == P0)
6320 Opc = AArch64::LD1Threev1d;
6321 else
6322 llvm_unreachable("Unexpected type for ld3!");
6323 selectVectorLoadIntrinsic(Opc, 3, I);
6324 break;
6325 }
6326 case Intrinsic::aarch64_neon_ld3lane: {
6327 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6328 unsigned Opc;
6329 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6330 Opc = AArch64::LD3i8;
6331 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6332 Opc = AArch64::LD3i16;
6333 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6334 Opc = AArch64::LD3i32;
6335 else if (Ty == LLT::fixed_vector(2, S64) ||
6336 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6337 Opc = AArch64::LD3i64;
6338 else
6339 llvm_unreachable("Unexpected type for st3lane!");
6340 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6341 return false;
6342 break;
6343 }
6344 case Intrinsic::aarch64_neon_ld3r: {
6345 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6346 unsigned Opc = 0;
6347 if (Ty == LLT::fixed_vector(8, S8))
6348 Opc = AArch64::LD3Rv8b;
6349 else if (Ty == LLT::fixed_vector(16, S8))
6350 Opc = AArch64::LD3Rv16b;
6351 else if (Ty == LLT::fixed_vector(4, S16))
6352 Opc = AArch64::LD3Rv4h;
6353 else if (Ty == LLT::fixed_vector(8, S16))
6354 Opc = AArch64::LD3Rv8h;
6355 else if (Ty == LLT::fixed_vector(2, S32))
6356 Opc = AArch64::LD3Rv2s;
6357 else if (Ty == LLT::fixed_vector(4, S32))
6358 Opc = AArch64::LD3Rv4s;
6359 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6360 Opc = AArch64::LD3Rv2d;
6361 else if (Ty == S64 || Ty == P0)
6362 Opc = AArch64::LD3Rv1d;
6363 else
6364 llvm_unreachable("Unexpected type for ld3r!");
6365 selectVectorLoadIntrinsic(Opc, 3, I);
6366 break;
6367 }
6368 case Intrinsic::aarch64_neon_ld4: {
6369 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6370 unsigned Opc = 0;
6371 if (Ty == LLT::fixed_vector(8, S8))
6372 Opc = AArch64::LD4Fourv8b;
6373 else if (Ty == LLT::fixed_vector(16, S8))
6374 Opc = AArch64::LD4Fourv16b;
6375 else if (Ty == LLT::fixed_vector(4, S16))
6376 Opc = AArch64::LD4Fourv4h;
6377 else if (Ty == LLT::fixed_vector(8, S16))
6378 Opc = AArch64::LD4Fourv8h;
6379 else if (Ty == LLT::fixed_vector(2, S32))
6380 Opc = AArch64::LD4Fourv2s;
6381 else if (Ty == LLT::fixed_vector(4, S32))
6382 Opc = AArch64::LD4Fourv4s;
6383 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6384 Opc = AArch64::LD4Fourv2d;
6385 else if (Ty == S64 || Ty == P0)
6386 Opc = AArch64::LD1Fourv1d;
6387 else
6388 llvm_unreachable("Unexpected type for ld4!");
6389 selectVectorLoadIntrinsic(Opc, 4, I);
6390 break;
6391 }
6392 case Intrinsic::aarch64_neon_ld4lane: {
6393 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6394 unsigned Opc;
6395 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6396 Opc = AArch64::LD4i8;
6397 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6398 Opc = AArch64::LD4i16;
6399 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6400 Opc = AArch64::LD4i32;
6401 else if (Ty == LLT::fixed_vector(2, S64) ||
6402 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6403 Opc = AArch64::LD4i64;
6404 else
6405 llvm_unreachable("Unexpected type for st4lane!");
6406 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6407 return false;
6408 break;
6409 }
6410 case Intrinsic::aarch64_neon_ld4r: {
6411 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6412 unsigned Opc = 0;
6413 if (Ty == LLT::fixed_vector(8, S8))
6414 Opc = AArch64::LD4Rv8b;
6415 else if (Ty == LLT::fixed_vector(16, S8))
6416 Opc = AArch64::LD4Rv16b;
6417 else if (Ty == LLT::fixed_vector(4, S16))
6418 Opc = AArch64::LD4Rv4h;
6419 else if (Ty == LLT::fixed_vector(8, S16))
6420 Opc = AArch64::LD4Rv8h;
6421 else if (Ty == LLT::fixed_vector(2, S32))
6422 Opc = AArch64::LD4Rv2s;
6423 else if (Ty == LLT::fixed_vector(4, S32))
6424 Opc = AArch64::LD4Rv4s;
6425 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6426 Opc = AArch64::LD4Rv2d;
6427 else if (Ty == S64 || Ty == P0)
6428 Opc = AArch64::LD4Rv1d;
6429 else
6430 llvm_unreachable("Unexpected type for ld4r!");
6431 selectVectorLoadIntrinsic(Opc, 4, I);
6432 break;
6433 }
6434 case Intrinsic::aarch64_neon_st1x2: {
6435 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6436 unsigned Opc;
6437 if (Ty == LLT::fixed_vector(8, S8))
6438 Opc = AArch64::ST1Twov8b;
6439 else if (Ty == LLT::fixed_vector(16, S8))
6440 Opc = AArch64::ST1Twov16b;
6441 else if (Ty == LLT::fixed_vector(4, S16))
6442 Opc = AArch64::ST1Twov4h;
6443 else if (Ty == LLT::fixed_vector(8, S16))
6444 Opc = AArch64::ST1Twov8h;
6445 else if (Ty == LLT::fixed_vector(2, S32))
6446 Opc = AArch64::ST1Twov2s;
6447 else if (Ty == LLT::fixed_vector(4, S32))
6448 Opc = AArch64::ST1Twov4s;
6449 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6450 Opc = AArch64::ST1Twov2d;
6451 else if (Ty == S64 || Ty == P0)
6452 Opc = AArch64::ST1Twov1d;
6453 else
6454 llvm_unreachable("Unexpected type for st1x2!");
6455 selectVectorStoreIntrinsic(I, 2, Opc);
6456 break;
6457 }
6458 case Intrinsic::aarch64_neon_st1x3: {
6459 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6460 unsigned Opc;
6461 if (Ty == LLT::fixed_vector(8, S8))
6462 Opc = AArch64::ST1Threev8b;
6463 else if (Ty == LLT::fixed_vector(16, S8))
6464 Opc = AArch64::ST1Threev16b;
6465 else if (Ty == LLT::fixed_vector(4, S16))
6466 Opc = AArch64::ST1Threev4h;
6467 else if (Ty == LLT::fixed_vector(8, S16))
6468 Opc = AArch64::ST1Threev8h;
6469 else if (Ty == LLT::fixed_vector(2, S32))
6470 Opc = AArch64::ST1Threev2s;
6471 else if (Ty == LLT::fixed_vector(4, S32))
6472 Opc = AArch64::ST1Threev4s;
6473 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6474 Opc = AArch64::ST1Threev2d;
6475 else if (Ty == S64 || Ty == P0)
6476 Opc = AArch64::ST1Threev1d;
6477 else
6478 llvm_unreachable("Unexpected type for st1x3!");
6479 selectVectorStoreIntrinsic(I, 3, Opc);
6480 break;
6481 }
6482 case Intrinsic::aarch64_neon_st1x4: {
6483 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6484 unsigned Opc;
6485 if (Ty == LLT::fixed_vector(8, S8))
6486 Opc = AArch64::ST1Fourv8b;
6487 else if (Ty == LLT::fixed_vector(16, S8))
6488 Opc = AArch64::ST1Fourv16b;
6489 else if (Ty == LLT::fixed_vector(4, S16))
6490 Opc = AArch64::ST1Fourv4h;
6491 else if (Ty == LLT::fixed_vector(8, S16))
6492 Opc = AArch64::ST1Fourv8h;
6493 else if (Ty == LLT::fixed_vector(2, S32))
6494 Opc = AArch64::ST1Fourv2s;
6495 else if (Ty == LLT::fixed_vector(4, S32))
6496 Opc = AArch64::ST1Fourv4s;
6497 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6498 Opc = AArch64::ST1Fourv2d;
6499 else if (Ty == S64 || Ty == P0)
6500 Opc = AArch64::ST1Fourv1d;
6501 else
6502 llvm_unreachable("Unexpected type for st1x4!");
6503 selectVectorStoreIntrinsic(I, 4, Opc);
6504 break;
6505 }
6506 case Intrinsic::aarch64_neon_st2: {
6507 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6508 unsigned Opc;
6509 if (Ty == LLT::fixed_vector(8, S8))
6510 Opc = AArch64::ST2Twov8b;
6511 else if (Ty == LLT::fixed_vector(16, S8))
6512 Opc = AArch64::ST2Twov16b;
6513 else if (Ty == LLT::fixed_vector(4, S16))
6514 Opc = AArch64::ST2Twov4h;
6515 else if (Ty == LLT::fixed_vector(8, S16))
6516 Opc = AArch64::ST2Twov8h;
6517 else if (Ty == LLT::fixed_vector(2, S32))
6518 Opc = AArch64::ST2Twov2s;
6519 else if (Ty == LLT::fixed_vector(4, S32))
6520 Opc = AArch64::ST2Twov4s;
6521 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6522 Opc = AArch64::ST2Twov2d;
6523 else if (Ty == S64 || Ty == P0)
6524 Opc = AArch64::ST1Twov1d;
6525 else
6526 llvm_unreachable("Unexpected type for st2!");
6527 selectVectorStoreIntrinsic(I, 2, Opc);
6528 break;
6529 }
6530 case Intrinsic::aarch64_neon_st3: {
6531 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6532 unsigned Opc;
6533 if (Ty == LLT::fixed_vector(8, S8))
6534 Opc = AArch64::ST3Threev8b;
6535 else if (Ty == LLT::fixed_vector(16, S8))
6536 Opc = AArch64::ST3Threev16b;
6537 else if (Ty == LLT::fixed_vector(4, S16))
6538 Opc = AArch64::ST3Threev4h;
6539 else if (Ty == LLT::fixed_vector(8, S16))
6540 Opc = AArch64::ST3Threev8h;
6541 else if (Ty == LLT::fixed_vector(2, S32))
6542 Opc = AArch64::ST3Threev2s;
6543 else if (Ty == LLT::fixed_vector(4, S32))
6544 Opc = AArch64::ST3Threev4s;
6545 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6546 Opc = AArch64::ST3Threev2d;
6547 else if (Ty == S64 || Ty == P0)
6548 Opc = AArch64::ST1Threev1d;
6549 else
6550 llvm_unreachable("Unexpected type for st3!");
6551 selectVectorStoreIntrinsic(I, 3, Opc);
6552 break;
6553 }
6554 case Intrinsic::aarch64_neon_st4: {
6555 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6556 unsigned Opc;
6557 if (Ty == LLT::fixed_vector(8, S8))
6558 Opc = AArch64::ST4Fourv8b;
6559 else if (Ty == LLT::fixed_vector(16, S8))
6560 Opc = AArch64::ST4Fourv16b;
6561 else if (Ty == LLT::fixed_vector(4, S16))
6562 Opc = AArch64::ST4Fourv4h;
6563 else if (Ty == LLT::fixed_vector(8, S16))
6564 Opc = AArch64::ST4Fourv8h;
6565 else if (Ty == LLT::fixed_vector(2, S32))
6566 Opc = AArch64::ST4Fourv2s;
6567 else if (Ty == LLT::fixed_vector(4, S32))
6568 Opc = AArch64::ST4Fourv4s;
6569 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6570 Opc = AArch64::ST4Fourv2d;
6571 else if (Ty == S64 || Ty == P0)
6572 Opc = AArch64::ST1Fourv1d;
6573 else
6574 llvm_unreachable("Unexpected type for st4!");
6575 selectVectorStoreIntrinsic(I, 4, Opc);
6576 break;
6577 }
6578 case Intrinsic::aarch64_neon_st2lane: {
6579 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6580 unsigned Opc;
6581 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6582 Opc = AArch64::ST2i8;
6583 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6584 Opc = AArch64::ST2i16;
6585 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6586 Opc = AArch64::ST2i32;
6587 else if (Ty == LLT::fixed_vector(2, S64) ||
6588 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6589 Opc = AArch64::ST2i64;
6590 else
6591 llvm_unreachable("Unexpected type for st2lane!");
6592 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6593 return false;
6594 break;
6595 }
6596 case Intrinsic::aarch64_neon_st3lane: {
6597 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6598 unsigned Opc;
6599 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6600 Opc = AArch64::ST3i8;
6601 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6602 Opc = AArch64::ST3i16;
6603 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6604 Opc = AArch64::ST3i32;
6605 else if (Ty == LLT::fixed_vector(2, S64) ||
6606 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6607 Opc = AArch64::ST3i64;
6608 else
6609 llvm_unreachable("Unexpected type for st3lane!");
6610 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6611 return false;
6612 break;
6613 }
6614 case Intrinsic::aarch64_neon_st4lane: {
6615 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6616 unsigned Opc;
6617 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6618 Opc = AArch64::ST4i8;
6619 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6620 Opc = AArch64::ST4i16;
6621 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6622 Opc = AArch64::ST4i32;
6623 else if (Ty == LLT::fixed_vector(2, S64) ||
6624 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6625 Opc = AArch64::ST4i64;
6626 else
6627 llvm_unreachable("Unexpected type for st4lane!");
6628 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6629 return false;
6630 break;
6631 }
6632 case Intrinsic::aarch64_mops_memset_tag: {
6633 // Transform
6634 // %dst:gpr(p0) = \
6635 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6636 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6637 // where %dst is updated, into
6638 // %Rd:GPR64common, %Rn:GPR64) = \
6639 // MOPSMemorySetTaggingPseudo \
6640 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6641 // where Rd and Rn are tied.
6642 // It is expected that %val has been extended to s64 in legalization.
6643 // Note that the order of the size/value operands are swapped.
6644
6645 Register DstDef = I.getOperand(0).getReg();
6646 // I.getOperand(1) is the intrinsic function
6647 Register DstUse = I.getOperand(2).getReg();
6648 Register ValUse = I.getOperand(3).getReg();
6649 Register SizeUse = I.getOperand(4).getReg();
6650
6651 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6652 // Therefore an additional virtual register is requried for the updated size
6653 // operand. This value is not accessible via the semantics of the intrinsic.
6654 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6655
6656 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6657 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6658 Memset.cloneMemRefs(I);
6660 break;
6661 }
6662 }
6663
6664 I.eraseFromParent();
6665 return true;
6666}
6667
6668bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6670 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6671
6672 switch (IntrinID) {
6673 default:
6674 break;
6675 case Intrinsic::aarch64_crypto_sha1h: {
6676 Register DstReg = I.getOperand(0).getReg();
6677 Register SrcReg = I.getOperand(2).getReg();
6678
6679 // FIXME: Should this be an assert?
6680 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6681 MRI.getType(SrcReg).getSizeInBits() != 32)
6682 return false;
6683
6684 // The operation has to happen on FPRs. Set up some new FPR registers for
6685 // the source and destination if they are on GPRs.
6686 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6687 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6688 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6689
6690 // Make sure the copy ends up getting constrained properly.
6691 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6692 AArch64::GPR32RegClass, MRI);
6693 }
6694
6695 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6696 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6697
6698 // Actually insert the instruction.
6699 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6700 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6701
6702 // Did we create a new register for the destination?
6703 if (DstReg != I.getOperand(0).getReg()) {
6704 // Yep. Copy the result of the instruction back into the original
6705 // destination.
6706 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6707 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6708 AArch64::GPR32RegClass, MRI);
6709 }
6710
6711 I.eraseFromParent();
6712 return true;
6713 }
6714 case Intrinsic::frameaddress:
6715 case Intrinsic::returnaddress: {
6716 MachineFunction &MF = *I.getParent()->getParent();
6717 MachineFrameInfo &MFI = MF.getFrameInfo();
6718
6719 unsigned Depth = I.getOperand(2).getImm();
6720 Register DstReg = I.getOperand(0).getReg();
6721 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6722
6723 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6724 if (!MFReturnAddr) {
6725 // Insert the copy from LR/X30 into the entry block, before it can be
6726 // clobbered by anything.
6727 MFI.setReturnAddressIsTaken(true);
6728 MFReturnAddr = getFunctionLiveInPhysReg(
6729 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6730 }
6731
6732 if (STI.hasPAuth()) {
6733 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6734 } else {
6735 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6736 MIB.buildInstr(AArch64::XPACLRI);
6737 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6738 }
6739
6740 I.eraseFromParent();
6741 return true;
6742 }
6743
6744 MFI.setFrameAddressIsTaken(true);
6745 Register FrameAddr(AArch64::FP);
6746 while (Depth--) {
6747 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6748 auto Ldr =
6749 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6751 FrameAddr = NextFrame;
6752 }
6753
6754 if (IntrinID == Intrinsic::frameaddress)
6755 MIB.buildCopy({DstReg}, {FrameAddr});
6756 else {
6757 MFI.setReturnAddressIsTaken(true);
6758
6759 if (STI.hasPAuth()) {
6760 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6761 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6762 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6763 } else {
6764 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6765 .addImm(1);
6766 MIB.buildInstr(AArch64::XPACLRI);
6767 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6768 }
6769 }
6770
6771 I.eraseFromParent();
6772 return true;
6773 }
6774 case Intrinsic::swift_async_context_addr:
6775 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6776 {Register(AArch64::FP)})
6777 .addImm(8)
6778 .addImm(0);
6780
6782 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6783 I.eraseFromParent();
6784 return true;
6785 }
6786 return false;
6787}
6788
6790AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6791 auto MaybeImmed = getImmedFromMO(Root);
6792 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6793 return std::nullopt;
6794 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6795 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6796}
6797
6799AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6800 auto MaybeImmed = getImmedFromMO(Root);
6801 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6802 return std::nullopt;
6803 uint64_t Enc = 31 - *MaybeImmed;
6804 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6805}
6806
6808AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6809 auto MaybeImmed = getImmedFromMO(Root);
6810 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6811 return std::nullopt;
6812 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6813 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6814}
6815
6817AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6818 auto MaybeImmed = getImmedFromMO(Root);
6819 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6820 return std::nullopt;
6821 uint64_t Enc = 63 - *MaybeImmed;
6822 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6823}
6824
6825/// Helper to select an immediate value that can be represented as a 12-bit
6826/// value shifted left by either 0 or 12. If it is possible to do so, return
6827/// the immediate and shift value. If not, return std::nullopt.
6828///
6829/// Used by selectArithImmed and selectNegArithImmed.
6831AArch64InstructionSelector::select12BitValueWithLeftShift(
6832 uint64_t Immed) const {
6833 unsigned ShiftAmt;
6834 if (Immed >> 12 == 0) {
6835 ShiftAmt = 0;
6836 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6837 ShiftAmt = 12;
6838 Immed = Immed >> 12;
6839 } else
6840 return std::nullopt;
6841
6842 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6843 return {{
6844 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6845 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6846 }};
6847}
6848
6849/// SelectArithImmed - Select an immediate value that can be represented as
6850/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6851/// Val set to the 12-bit value and Shift set to the shifter operand.
6853AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6854 // This function is called from the addsub_shifted_imm ComplexPattern,
6855 // which lists [imm] as the list of opcode it's interested in, however
6856 // we still need to check whether the operand is actually an immediate
6857 // here because the ComplexPattern opcode list is only used in
6858 // root-level opcode matching.
6859 auto MaybeImmed = getImmedFromMO(Root);
6860 if (MaybeImmed == std::nullopt)
6861 return std::nullopt;
6862 return select12BitValueWithLeftShift(*MaybeImmed);
6863}
6864
6865/// SelectNegArithImmed - As above, but negates the value before trying to
6866/// select it.
6868AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6869 // We need a register here, because we need to know if we have a 64 or 32
6870 // bit immediate.
6871 if (!Root.isReg())
6872 return std::nullopt;
6873 auto MaybeImmed = getImmedFromMO(Root);
6874 if (MaybeImmed == std::nullopt)
6875 return std::nullopt;
6876 uint64_t Immed = *MaybeImmed;
6877
6878 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6879 // have the opposite effect on the C flag, so this pattern mustn't match under
6880 // those circumstances.
6881 if (Immed == 0)
6882 return std::nullopt;
6883
6884 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6885 // the root.
6887 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6888 Immed = ~((uint32_t)Immed) + 1;
6889 else
6890 Immed = ~Immed + 1ULL;
6891
6892 if (Immed & 0xFFFFFFFFFF000000ULL)
6893 return std::nullopt;
6894
6895 Immed &= 0xFFFFFFULL;
6896 return select12BitValueWithLeftShift(Immed);
6897}
6898
6899/// Return true if it is worth folding MI into an extended register. That is,
6900/// if it's safe to pull it into the addressing mode of a load or store as a
6901/// shift.
6902bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6903 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6904 // Always fold if there is one use, or if we're optimizing for size.
6905 Register DefReg = MI.getOperand(0).getReg();
6906 if (MRI.hasOneNonDBGUse(DefReg) ||
6907 MI.getParent()->getParent()->getFunction().hasOptSize())
6908 return true;
6909
6910 // It's better to avoid folding and recomputing shifts when we don't have a
6911 // fastpath.
6912 if (!STI.hasAddrLSLFast())
6913 return false;
6914
6915 // We have a fastpath, so folding a shift in and potentially computing it
6916 // many times may be beneficial. Check if this is only used in memory ops.
6917 // If it is, then we should fold.
6918 return all_of(MRI.use_nodbg_instructions(DefReg),
6919 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6920}
6921
6923 switch (Type) {
6924 case AArch64_AM::SXTB:
6925 case AArch64_AM::SXTH:
6926 case AArch64_AM::SXTW:
6927 return true;
6928 default:
6929 return false;
6930 }
6931}
6932
6934AArch64InstructionSelector::selectExtendedSHL(
6936 unsigned SizeInBytes, bool WantsExt) const {
6937 assert(Base.isReg() && "Expected base to be a register operand");
6938 assert(Offset.isReg() && "Expected offset to be a register operand");
6939
6941 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6942
6943 unsigned OffsetOpc = OffsetInst->getOpcode();
6944 bool LookedThroughZExt = false;
6945 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6946 // Try to look through a ZEXT.
6947 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6948 return std::nullopt;
6949
6950 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6951 OffsetOpc = OffsetInst->getOpcode();
6952 LookedThroughZExt = true;
6953
6954 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6955 return std::nullopt;
6956 }
6957 // Make sure that the memory op is a valid size.
6958 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6959 if (LegalShiftVal == 0)
6960 return std::nullopt;
6961 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6962 return std::nullopt;
6963
6964 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6965 // register we will offset is the LHS, and the register containing the
6966 // constant is the RHS.
6967 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6968 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6969 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6970 if (!ValAndVReg) {
6971 // We didn't get a constant on the RHS. If the opcode is a shift, then
6972 // we're done.
6973 if (OffsetOpc == TargetOpcode::G_SHL)
6974 return std::nullopt;
6975
6976 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6977 std::swap(OffsetReg, ConstantReg);
6978 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6979 if (!ValAndVReg)
6980 return std::nullopt;
6981 }
6982
6983 // The value must fit into 3 bits, and must be positive. Make sure that is
6984 // true.
6985 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6986
6987 // Since we're going to pull this into a shift, the constant value must be
6988 // a power of 2. If we got a multiply, then we need to check this.
6989 if (OffsetOpc == TargetOpcode::G_MUL) {
6990 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6991 return std::nullopt;
6992
6993 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6994 ImmVal = Log2_32(ImmVal);
6995 }
6996
6997 if ((ImmVal & 0x7) != ImmVal)
6998 return std::nullopt;
6999
7000 // We are only allowed to shift by LegalShiftVal. This shift value is built
7001 // into the instruction, so we can't just use whatever we want.
7002 if (ImmVal != LegalShiftVal)
7003 return std::nullopt;
7004
7005 unsigned SignExtend = 0;
7006 if (WantsExt) {
7007 // Check if the offset is defined by an extend, unless we looked through a
7008 // G_ZEXT earlier.
7009 if (!LookedThroughZExt) {
7010 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7011 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7013 return std::nullopt;
7014
7015 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7016 // We only support SXTW for signed extension here.
7017 if (SignExtend && Ext != AArch64_AM::SXTW)
7018 return std::nullopt;
7019 OffsetReg = ExtInst->getOperand(1).getReg();
7020 }
7021
7022 // Need a 32-bit wide register here.
7023 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7024 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7025 }
7026
7027 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7028 // offset. Signify that we are shifting by setting the shift flag to 1.
7029 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7030 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7031 [=](MachineInstrBuilder &MIB) {
7032 // Need to add both immediates here to make sure that they are both
7033 // added to the instruction.
7034 MIB.addImm(SignExtend);
7035 MIB.addImm(1);
7036 }}};
7037}
7038
7039/// This is used for computing addresses like this:
7040///
7041/// ldr x1, [x2, x3, lsl #3]
7042///
7043/// Where x2 is the base register, and x3 is an offset register. The shift-left
7044/// is a constant value specific to this load instruction. That is, we'll never
7045/// see anything other than a 3 here (which corresponds to the size of the
7046/// element being loaded.)
7048AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7049 MachineOperand &Root, unsigned SizeInBytes) const {
7050 if (!Root.isReg())
7051 return std::nullopt;
7053
7054 // We want to find something like this:
7055 //
7056 // val = G_CONSTANT LegalShiftVal
7057 // shift = G_SHL off_reg val
7058 // ptr = G_PTR_ADD base_reg shift
7059 // x = G_LOAD ptr
7060 //
7061 // And fold it into this addressing mode:
7062 //
7063 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7064
7065 // Check if we can find the G_PTR_ADD.
7066 MachineInstr *PtrAdd =
7067 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7068 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7069 return std::nullopt;
7070
7071 // Now, try to match an opcode which will match our specific offset.
7072 // We want a G_SHL or a G_MUL.
7073 MachineInstr *OffsetInst =
7075 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7076 OffsetInst->getOperand(0), SizeInBytes,
7077 /*WantsExt=*/false);
7078}
7079
7080/// This is used for computing addresses like this:
7081///
7082/// ldr x1, [x2, x3]
7083///
7084/// Where x2 is the base register, and x3 is an offset register.
7085///
7086/// When possible (or profitable) to fold a G_PTR_ADD into the address
7087/// calculation, this will do so. Otherwise, it will return std::nullopt.
7089AArch64InstructionSelector::selectAddrModeRegisterOffset(
7090 MachineOperand &Root) const {
7092
7093 // We need a GEP.
7094 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7095 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7096 return std::nullopt;
7097
7098 // If this is used more than once, let's not bother folding.
7099 // TODO: Check if they are memory ops. If they are, then we can still fold
7100 // without having to recompute anything.
7101 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7102 return std::nullopt;
7103
7104 // Base is the GEP's LHS, offset is its RHS.
7105 return {{[=](MachineInstrBuilder &MIB) {
7106 MIB.addUse(Gep->getOperand(1).getReg());
7107 },
7108 [=](MachineInstrBuilder &MIB) {
7109 MIB.addUse(Gep->getOperand(2).getReg());
7110 },
7111 [=](MachineInstrBuilder &MIB) {
7112 // Need to add both immediates here to make sure that they are both
7113 // added to the instruction.
7114 MIB.addImm(0);
7115 MIB.addImm(0);
7116 }}};
7117}
7118
7119/// This is intended to be equivalent to selectAddrModeXRO in
7120/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7122AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7123 unsigned SizeInBytes) const {
7125 if (!Root.isReg())
7126 return std::nullopt;
7127 MachineInstr *PtrAdd =
7128 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7129 if (!PtrAdd)
7130 return std::nullopt;
7131
7132 // Check for an immediates which cannot be encoded in the [base + imm]
7133 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7134 // end up with code like:
7135 //
7136 // mov x0, wide
7137 // add x1 base, x0
7138 // ldr x2, [x1, x0]
7139 //
7140 // In this situation, we can use the [base, xreg] addressing mode to save an
7141 // add/sub:
7142 //
7143 // mov x0, wide
7144 // ldr x2, [base, x0]
7145 auto ValAndVReg =
7147 if (ValAndVReg) {
7148 unsigned Scale = Log2_32(SizeInBytes);
7149 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7150
7151 // Skip immediates that can be selected in the load/store addresing
7152 // mode.
7153 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7154 ImmOff < (0x1000 << Scale))
7155 return std::nullopt;
7156
7157 // Helper lambda to decide whether or not it is preferable to emit an add.
7158 auto isPreferredADD = [](int64_t ImmOff) {
7159 // Constants in [0x0, 0xfff] can be encoded in an add.
7160 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7161 return true;
7162
7163 // Can it be encoded in an add lsl #12?
7164 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7165 return false;
7166
7167 // It can be encoded in an add lsl #12, but we may not want to. If it is
7168 // possible to select this as a single movz, then prefer that. A single
7169 // movz is faster than an add with a shift.
7170 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7171 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7172 };
7173
7174 // If the immediate can be encoded in a single add/sub, then bail out.
7175 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7176 return std::nullopt;
7177 }
7178
7179 // Try to fold shifts into the addressing mode.
7180 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7181 if (AddrModeFns)
7182 return AddrModeFns;
7183
7184 // If that doesn't work, see if it's possible to fold in registers from
7185 // a GEP.
7186 return selectAddrModeRegisterOffset(Root);
7187}
7188
7189/// This is used for computing addresses like this:
7190///
7191/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7192///
7193/// Where we have a 64-bit base register, a 32-bit offset register, and an
7194/// extend (which may or may not be signed).
7196AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7197 unsigned SizeInBytes) const {
7199
7200 MachineInstr *PtrAdd =
7201 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7202 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7203 return std::nullopt;
7204
7205 MachineOperand &LHS = PtrAdd->getOperand(1);
7206 MachineOperand &RHS = PtrAdd->getOperand(2);
7207 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7208
7209 // The first case is the same as selectAddrModeXRO, except we need an extend.
7210 // In this case, we try to find a shift and extend, and fold them into the
7211 // addressing mode.
7212 //
7213 // E.g.
7214 //
7215 // off_reg = G_Z/S/ANYEXT ext_reg
7216 // val = G_CONSTANT LegalShiftVal
7217 // shift = G_SHL off_reg val
7218 // ptr = G_PTR_ADD base_reg shift
7219 // x = G_LOAD ptr
7220 //
7221 // In this case we can get a load like this:
7222 //
7223 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7224 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7225 SizeInBytes, /*WantsExt=*/true);
7226 if (ExtendedShl)
7227 return ExtendedShl;
7228
7229 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7230 //
7231 // e.g.
7232 // ldr something, [base_reg, ext_reg, sxtw]
7233 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
7234 return std::nullopt;
7235
7236 // Check if this is an extend. We'll get an extend type if it is.
7238 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7240 return std::nullopt;
7241
7242 // Need a 32-bit wide register.
7243 MachineIRBuilder MIB(*PtrAdd);
7244 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7245 AArch64::GPR32RegClass, MIB);
7246 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7247
7248 // Base is LHS, offset is ExtReg.
7249 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7250 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7251 [=](MachineInstrBuilder &MIB) {
7252 MIB.addImm(SignExtend);
7253 MIB.addImm(0);
7254 }}};
7255}
7256
7257/// Select a "register plus unscaled signed 9-bit immediate" address. This
7258/// should only match when there is an offset that is not valid for a scaled
7259/// immediate addressing mode. The "Size" argument is the size in bytes of the
7260/// memory reference, which is needed here to know what is valid for a scaled
7261/// immediate.
7263AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7264 unsigned Size) const {
7266 Root.getParent()->getParent()->getParent()->getRegInfo();
7267
7268 if (!Root.isReg())
7269 return std::nullopt;
7270
7271 if (!isBaseWithConstantOffset(Root, MRI))
7272 return std::nullopt;
7273
7274 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7275
7276 MachineOperand &OffImm = RootDef->getOperand(2);
7277 if (!OffImm.isReg())
7278 return std::nullopt;
7279 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7280 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7281 return std::nullopt;
7282 int64_t RHSC;
7283 MachineOperand &RHSOp1 = RHS->getOperand(1);
7284 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7285 return std::nullopt;
7286 RHSC = RHSOp1.getCImm()->getSExtValue();
7287
7288 if (RHSC >= -256 && RHSC < 256) {
7289 MachineOperand &Base = RootDef->getOperand(1);
7290 return {{
7291 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7292 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7293 }};
7294 }
7295 return std::nullopt;
7296}
7297
7299AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7300 unsigned Size,
7301 MachineRegisterInfo &MRI) const {
7302 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7303 return std::nullopt;
7304 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7305 if (Adrp.getOpcode() != AArch64::ADRP)
7306 return std::nullopt;
7307
7308 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7309 auto Offset = Adrp.getOperand(1).getOffset();
7310 if (Offset % Size != 0)
7311 return std::nullopt;
7312
7313 auto GV = Adrp.getOperand(1).getGlobal();
7314 if (GV->isThreadLocal())
7315 return std::nullopt;
7316
7317 auto &MF = *RootDef.getParent()->getParent();
7318 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7319 return std::nullopt;
7320
7321 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7322 MachineIRBuilder MIRBuilder(RootDef);
7323 Register AdrpReg = Adrp.getOperand(0).getReg();
7324 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7325 [=](MachineInstrBuilder &MIB) {
7326 MIB.addGlobalAddress(GV, Offset,
7327 OpFlags | AArch64II::MO_PAGEOFF |
7329 }}};
7330}
7331
7332/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7333/// "Size" argument is the size in bytes of the memory reference, which
7334/// determines the scale.
7336AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7337 unsigned Size) const {
7338 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7340
7341 if (!Root.isReg())
7342 return std::nullopt;
7343
7344 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7345 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7346 return {{
7347 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7348 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7349 }};
7350 }
7351
7353 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7354 if (CM == CodeModel::Small) {
7355 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7356 if (OpFns)
7357 return OpFns;
7358 }
7359
7360 if (isBaseWithConstantOffset(Root, MRI)) {
7361 MachineOperand &LHS = RootDef->getOperand(1);
7362 MachineOperand &RHS = RootDef->getOperand(2);
7363 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7364 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7365
7366 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7367 unsigned Scale = Log2_32(Size);
7368 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7369 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7370 return {{
7371 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7372 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7373 }};
7374
7375 return {{
7376 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7377 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7378 }};
7379 }
7380 }
7381
7382 // Before falling back to our general case, check if the unscaled
7383 // instructions can handle this. If so, that's preferable.
7384 if (selectAddrModeUnscaled(Root, Size))
7385 return std::nullopt;
7386
7387 return {{
7388 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7389 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7390 }};
7391}
7392
7393/// Given a shift instruction, return the correct shift type for that
7394/// instruction.
7396 switch (MI.getOpcode()) {
7397 default:
7399 case TargetOpcode::G_SHL:
7400 return AArch64_AM::LSL;
7401 case TargetOpcode::G_LSHR:
7402 return AArch64_AM::LSR;
7403 case TargetOpcode::G_ASHR:
7404 return AArch64_AM::ASR;
7405 case TargetOpcode::G_ROTR:
7406 return AArch64_AM::ROR;
7407 }
7408}
7409
7410/// Select a "shifted register" operand. If the value is not shifted, set the
7411/// shift operand to a default value of "lsl 0".
7413AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7414 bool AllowROR) const {
7415 if (!Root.isReg())
7416 return std::nullopt;
7418 Root.getParent()->getParent()->getParent()->getRegInfo();
7419
7420 // Check if the operand is defined by an instruction which corresponds to
7421 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7422 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7424 if (ShType == AArch64_AM::InvalidShiftExtend)
7425 return std::nullopt;
7426 if (ShType == AArch64_AM::ROR && !AllowROR)
7427 return std::nullopt;
7428 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
7429 return std::nullopt;
7430
7431 // Need an immediate on the RHS.
7432 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7433 auto Immed = getImmedFromMO(ShiftRHS);
7434 if (!Immed)
7435 return std::nullopt;
7436
7437 // We have something that we can fold. Fold in the shift's LHS and RHS into
7438 // the instruction.
7439 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7440 Register ShiftReg = ShiftLHS.getReg();
7441
7442 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7443 unsigned Val = *Immed & (NumBits - 1);
7444 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7445
7446 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7447 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7448}
7449
7450AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7451 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7452 unsigned Opc = MI.getOpcode();
7453
7454 // Handle explicit extend instructions first.
7455 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7456 unsigned Size;
7457 if (Opc == TargetOpcode::G_SEXT)
7458 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7459 else
7460 Size = MI.getOperand(2).getImm();
7461 assert(Size != 64 && "Extend from 64 bits?");
7462 switch (Size) {
7463 case 8:
7464 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7465 case 16:
7466 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7467 case 32:
7468 return AArch64_AM::SXTW;
7469 default:
7471 }
7472 }
7473
7474 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7475 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7476 assert(Size != 64 && "Extend from 64 bits?");
7477 switch (Size) {
7478 case 8:
7479 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7480 case 16:
7481 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7482 case 32:
7483 return AArch64_AM::UXTW;
7484 default:
7486 }
7487 }
7488
7489 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7490 // on the RHS.
7491 if (Opc != TargetOpcode::G_AND)
7493
7494 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7495 if (!MaybeAndMask)
7497 uint64_t AndMask = *MaybeAndMask;
7498 switch (AndMask) {
7499 default:
7501 case 0xFF:
7502 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7503 case 0xFFFF:
7504 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7505 case 0xFFFFFFFF:
7506 return AArch64_AM::UXTW;
7507 }
7508}
7509
7510Register AArch64InstructionSelector::moveScalarRegClass(
7511 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7512 MachineRegisterInfo &MRI = *MIB.getMRI();
7513 auto Ty = MRI.getType(Reg);
7514 assert(!Ty.isVector() && "Expected scalars only!");
7515 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7516 return Reg;
7517
7518 // Create a copy and immediately select it.
7519 // FIXME: We should have an emitCopy function?
7520 auto Copy = MIB.buildCopy({&RC}, {Reg});
7521 selectCopy(*Copy, TII, MRI, TRI, RBI);
7522 return Copy.getReg(0);
7523}
7524
7525/// Select an "extended register" operand. This operand folds in an extend
7526/// followed by an optional left shift.
7528AArch64InstructionSelector::selectArithExtendedRegister(
7529 MachineOperand &Root) const {
7530 if (!Root.isReg())
7531 return std::nullopt;
7533 Root.getParent()->getParent()->getParent()->getRegInfo();
7534
7535 uint64_t ShiftVal = 0;
7536 Register ExtReg;
7538 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7539 if (!RootDef)
7540 return std::nullopt;
7541
7542 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
7543 return std::nullopt;
7544
7545 // Check if we can fold a shift and an extend.
7546 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7547 // Look for a constant on the RHS of the shift.
7548 MachineOperand &RHS = RootDef->getOperand(2);
7549 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7550 if (!MaybeShiftVal)
7551 return std::nullopt;
7552 ShiftVal = *MaybeShiftVal;
7553 if (ShiftVal > 4)
7554 return std::nullopt;
7555 // Look for a valid extend instruction on the LHS of the shift.
7556 MachineOperand &LHS = RootDef->getOperand(1);
7557 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7558 if (!ExtDef)
7559 return std::nullopt;
7560 Ext = getExtendTypeForInst(*ExtDef, MRI);
7562 return std::nullopt;
7563 ExtReg = ExtDef->getOperand(1).getReg();
7564 } else {
7565 // Didn't get a shift. Try just folding an extend.
7566 Ext = getExtendTypeForInst(*RootDef, MRI);
7568 return std::nullopt;
7569 ExtReg = RootDef->getOperand(1).getReg();
7570
7571 // If we have a 32 bit instruction which zeroes out the high half of a
7572 // register, we get an implicit zero extend for free. Check if we have one.
7573 // FIXME: We actually emit the extend right now even though we don't have
7574 // to.
7575 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7576 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7577 if (isDef32(*ExtInst))
7578 return std::nullopt;
7579 }
7580 }
7581
7582 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7583 // copy.
7584 MachineIRBuilder MIB(*RootDef);
7585 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7586
7587 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7588 [=](MachineInstrBuilder &MIB) {
7589 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7590 }}};
7591}
7592
7594AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7595 if (!Root.isReg())
7596 return std::nullopt;
7598 Root.getParent()->getParent()->getParent()->getRegInfo();
7599
7600 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7601 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7602 STI.isLittleEndian())
7603 Extract =
7604 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7605 if (!Extract)
7606 return std::nullopt;
7607
7608 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7609 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7610 Register ExtReg = Extract->MI->getOperand(2).getReg();
7611 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7612 }
7613 }
7614 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7615 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7617 Extract->MI->getOperand(2).getReg(), MRI);
7618 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7619 LaneIdx->Value.getSExtValue() == 1) {
7620 Register ExtReg = Extract->MI->getOperand(1).getReg();
7621 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7622 }
7623 }
7624
7625 return std::nullopt;
7626}
7627
7628void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7629 const MachineInstr &MI,
7630 int OpIdx) const {
7631 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7632 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7633 "Expected G_CONSTANT");
7634 std::optional<int64_t> CstVal =
7635 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7636 assert(CstVal && "Expected constant value");
7637 MIB.addImm(*CstVal);
7638}
7639
7640void AArch64InstructionSelector::renderLogicalImm32(
7641 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7642 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7643 "Expected G_CONSTANT");
7644 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7646 MIB.addImm(Enc);
7647}
7648
7649void AArch64InstructionSelector::renderLogicalImm64(
7650 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7651 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7652 "Expected G_CONSTANT");
7653 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7655 MIB.addImm(Enc);
7656}
7657
7658void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7659 const MachineInstr &MI,
7660 int OpIdx) const {
7661 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7662 "Expected G_UBSANTRAP");
7663 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7664}
7665
7666void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7667 const MachineInstr &MI,
7668 int OpIdx) const {
7669 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7670 "Expected G_FCONSTANT");
7671 MIB.addImm(
7672 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7673}
7674
7675void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7676 const MachineInstr &MI,
7677 int OpIdx) const {
7678 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7679 "Expected G_FCONSTANT");
7680 MIB.addImm(
7681 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7682}
7683
7684void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7685 const MachineInstr &MI,
7686 int OpIdx) const {
7687 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7688 "Expected G_FCONSTANT");
7689 MIB.addImm(
7690 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7691}
7692
7693void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7694 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7695 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7696 "Expected G_FCONSTANT");
7698 .getFPImm()
7699 ->getValueAPF()
7700 .bitcastToAPInt()
7701 .getZExtValue()));
7702}
7703
7704bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7705 const MachineInstr &MI, unsigned NumBytes) const {
7706 if (!MI.mayLoadOrStore())
7707 return false;
7708 assert(MI.hasOneMemOperand() &&
7709 "Expected load/store to have only one mem op!");
7710 return (*MI.memoperands_begin())->getSize() == NumBytes;
7711}
7712
7713bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7714 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7715 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7716 return false;
7717
7718 // Only return true if we know the operation will zero-out the high half of
7719 // the 64-bit register. Truncates can be subregister copies, which don't
7720 // zero out the high bits. Copies and other copy-like instructions can be
7721 // fed by truncates, or could be lowered as subregister copies.
7722 switch (MI.getOpcode()) {
7723 default:
7724 return true;
7725 case TargetOpcode::COPY:
7726 case TargetOpcode::G_BITCAST:
7727 case TargetOpcode::G_TRUNC:
7728 case TargetOpcode::G_PHI:
7729 return false;
7730 }
7731}
7732
7733
7734// Perform fixups on the given PHI instruction's operands to force them all
7735// to be the same as the destination regbank.
7737 const AArch64RegisterBankInfo &RBI) {
7738 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7739 Register DstReg = MI.getOperand(0).getReg();
7740 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7741 assert(DstRB && "Expected PHI dst to have regbank assigned");
7742 MachineIRBuilder MIB(MI);
7743
7744 // Go through each operand and ensure it has the same regbank.
7745 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7746 if (!MO.isReg())
7747 continue;
7748 Register OpReg = MO.getReg();
7749 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7750 if (RB != DstRB) {
7751 // Insert a cross-bank copy.
7752 auto *OpDef = MRI.getVRegDef(OpReg);
7753 const LLT &Ty = MRI.getType(OpReg);
7754 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7755
7756 // Any instruction we insert must appear after all PHIs in the block
7757 // for the block to be valid MIR.
7758 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7759 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7760 InsertPt = OpDefBB.getFirstNonPHI();
7761 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7762 auto Copy = MIB.buildCopy(Ty, OpReg);
7763 MRI.setRegBank(Copy.getReg(0), *DstRB);
7764 MO.setReg(Copy.getReg(0));
7765 }
7766 }
7767}
7768
7769void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7770 // We're looking for PHIs, build a list so we don't invalidate iterators.
7773 for (auto &BB : MF) {
7774 for (auto &MI : BB) {
7775 if (MI.getOpcode() == TargetOpcode::G_PHI)
7776 Phis.emplace_back(&MI);
7777 }
7778 }
7779
7780 for (auto *MI : Phis) {
7781 // We need to do some work here if the operand types are < 16 bit and they
7782 // are split across fpr/gpr banks. Since all types <32b on gpr
7783 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7784 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7785 // be selecting heterogenous regbanks for operands if possible, but we
7786 // still need to be able to deal with it here.
7787 //
7788 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7789 // one other operand is on the fpr bank, then we add cross-bank copies
7790 // to homogenize the operand banks. For simplicity the bank that we choose
7791 // to settle on is whatever bank the def operand has. For example:
7792 //
7793 // %endbb:
7794 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7795 // =>
7796 // %bb2:
7797 // ...
7798 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7799 // ...
7800 // %endbb:
7801 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7802 bool HasGPROp = false, HasFPROp = false;
7803 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7804 if (!MO.isReg())
7805 continue;
7806 const LLT &Ty = MRI.getType(MO.getReg());
7807 if (!Ty.isValid() || !Ty.isScalar())
7808 break;
7809 if (Ty.getSizeInBits() >= 32)
7810 break;
7811 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7812 // If for some reason we don't have a regbank yet. Don't try anything.
7813 if (!RB)
7814 break;
7815
7816 if (RB->getID() == AArch64::GPRRegBankID)
7817 HasGPROp = true;
7818 else
7819 HasFPROp = true;
7820 }
7821 // We have heterogenous regbanks, need to fixup.
7822 if (HasGPROp && HasFPROp)
7823 fixupPHIOpBanks(*MI, MRI, RBI);
7824 }
7825}
7826
7827namespace llvm {
7830 AArch64Subtarget &Subtarget,
7832 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7833}
7834}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
MachineBasicBlock & MBB
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:960
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:963
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:989
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:990
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:966
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:975
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:964
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:965
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:984
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:983
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:987
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:974
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:968
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:971
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:985
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:972
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:967
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:969
@ ICMP_EQ
equal
Definition: InstrTypes.h:981
@ ICMP_NE
not equal
Definition: InstrTypes.h:982
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:988
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:976
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:986
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:973
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:970
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1096
bool isIntPredicate() const
Definition: InstrTypes.h:1090
bool isUnsigned() const
Definition: InstrTypes.h:1238
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:2958
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:318
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:315
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:160
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1699
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1758
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:350
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:882
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:54
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:438
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:293
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:153
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:305
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:258
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1541
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1937
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:419
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:446
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:472
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.