LLVM 19.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
41#include "llvm/IR/Constants.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251 MachineOperand &Predicate,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
354 AArch64CC::CondCode Predicate,
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
359 AArch64CC::CondCode Predicate,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
418 const MachineRegisterInfo &MRI) const;
419 ComplexRendererFns
420 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
421 unsigned SizeInBytes) const;
422
423 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
424 /// or not a shift + extend should be folded into an addressing mode. Returns
425 /// None when this is not profitable or possible.
426 ComplexRendererFns
427 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
428 MachineOperand &Offset, unsigned SizeInBytes,
429 bool WantsExt) const;
430 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
431 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
432 unsigned SizeInBytes) const;
433 template <int Width>
434 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
435 return selectAddrModeXRO(Root, Width / 8);
436 }
437
438 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
442 return selectAddrModeWRO(Root, Width / 8);
443 }
444
445 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
446 bool AllowROR = false) const;
447
448 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
449 return selectShiftedRegister(Root);
450 }
451
452 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
453 return selectShiftedRegister(Root, true);
454 }
455
456 /// Given an extend instruction, determine the correct shift-extend type for
457 /// that instruction.
458 ///
459 /// If the instruction is going to be used in a load or store, pass
460 /// \p IsLoadStore = true.
462 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
463 bool IsLoadStore = false) const;
464
465 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
466 ///
467 /// \returns Either \p Reg if no change was necessary, or the new register
468 /// created by moving \p Reg.
469 ///
470 /// Note: This uses emitCopy right now.
471 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
472 MachineIRBuilder &MIB) const;
473
474 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
475
476 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
477
478 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
479 int OpIdx = -1) const;
480 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
481 int OpIdx = -1) const;
482 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
483 int OpIdx = -1) const;
484 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
485 int OpIdx) const;
486 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
487 int OpIdx = -1) const;
488 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx = -1) const;
490 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
491 int OpIdx = -1) const;
492 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
493 const MachineInstr &MI,
494 int OpIdx = -1) const;
495
496 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
497 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
498
499 // Optimization methods.
500 bool tryOptSelect(GSelect &Sel);
501 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
502 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
503 MachineOperand &Predicate,
504 MachineIRBuilder &MIRBuilder) const;
505
506 /// Return true if \p MI is a load or store of \p NumBytes bytes.
507 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
508
509 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
510 /// register zeroed out. In other words, the result of MI has been explicitly
511 /// zero extended.
512 bool isDef32(const MachineInstr &MI) const;
513
515 const AArch64Subtarget &STI;
516 const AArch64InstrInfo &TII;
518 const AArch64RegisterBankInfo &RBI;
519
520 bool ProduceNonFlagSettingCondBr = false;
521
522 // Some cached values used during selection.
523 // We use LR as a live-in register, and we keep track of it here as it can be
524 // clobbered by calls.
525 Register MFReturnAddr;
526
528
529#define GET_GLOBALISEL_PREDICATES_DECL
530#include "AArch64GenGlobalISel.inc"
531#undef GET_GLOBALISEL_PREDICATES_DECL
532
533// We declare the temporaries used by selectImpl() in the class to minimize the
534// cost of constructing placeholder values.
535#define GET_GLOBALISEL_TEMPORARIES_DECL
536#include "AArch64GenGlobalISel.inc"
537#undef GET_GLOBALISEL_TEMPORARIES_DECL
538};
539
540} // end anonymous namespace
541
542#define GET_GLOBALISEL_IMPL
543#include "AArch64GenGlobalISel.inc"
544#undef GET_GLOBALISEL_IMPL
545
546AArch64InstructionSelector::AArch64InstructionSelector(
547 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
548 const AArch64RegisterBankInfo &RBI)
549 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
550 RBI(RBI),
552#include "AArch64GenGlobalISel.inc"
555#include "AArch64GenGlobalISel.inc"
557{
558}
559
560// FIXME: This should be target-independent, inferred from the types declared
561// for each class in the bank.
562//
563/// Given a register bank, and a type, return the smallest register class that
564/// can represent that combination.
565static const TargetRegisterClass *
566getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
567 bool GetAllRegSet = false) {
568 if (RB.getID() == AArch64::GPRRegBankID) {
569 if (Ty.getSizeInBits() <= 32)
570 return GetAllRegSet ? &AArch64::GPR32allRegClass
571 : &AArch64::GPR32RegClass;
572 if (Ty.getSizeInBits() == 64)
573 return GetAllRegSet ? &AArch64::GPR64allRegClass
574 : &AArch64::GPR64RegClass;
575 if (Ty.getSizeInBits() == 128)
576 return &AArch64::XSeqPairsClassRegClass;
577 return nullptr;
578 }
579
580 if (RB.getID() == AArch64::FPRRegBankID) {
581 switch (Ty.getSizeInBits()) {
582 case 8:
583 return &AArch64::FPR8RegClass;
584 case 16:
585 return &AArch64::FPR16RegClass;
586 case 32:
587 return &AArch64::FPR32RegClass;
588 case 64:
589 return &AArch64::FPR64RegClass;
590 case 128:
591 return &AArch64::FPR128RegClass;
592 }
593 return nullptr;
594 }
595
596 return nullptr;
597}
598
599/// Given a register bank, and size in bits, return the smallest register class
600/// that can represent that combination.
601static const TargetRegisterClass *
603 bool GetAllRegSet = false) {
604 if (SizeInBits.isScalable()) {
605 assert(RB.getID() == AArch64::FPRRegBankID &&
606 "Expected FPR regbank for scalable type size");
607 return &AArch64::ZPRRegClass;
608 }
609
610 unsigned RegBankID = RB.getID();
611
612 if (RegBankID == AArch64::GPRRegBankID) {
613 if (SizeInBits <= 32)
614 return GetAllRegSet ? &AArch64::GPR32allRegClass
615 : &AArch64::GPR32RegClass;
616 if (SizeInBits == 64)
617 return GetAllRegSet ? &AArch64::GPR64allRegClass
618 : &AArch64::GPR64RegClass;
619 if (SizeInBits == 128)
620 return &AArch64::XSeqPairsClassRegClass;
621 }
622
623 if (RegBankID == AArch64::FPRRegBankID) {
624 switch (SizeInBits) {
625 default:
626 return nullptr;
627 case 8:
628 return &AArch64::FPR8RegClass;
629 case 16:
630 return &AArch64::FPR16RegClass;
631 case 32:
632 return &AArch64::FPR32RegClass;
633 case 64:
634 return &AArch64::FPR64RegClass;
635 case 128:
636 return &AArch64::FPR128RegClass;
637 }
638 }
639
640 return nullptr;
641}
642
643/// Returns the correct subregister to use for a given register class.
645 const TargetRegisterInfo &TRI, unsigned &SubReg) {
646 switch (TRI.getRegSizeInBits(*RC)) {
647 case 8:
648 SubReg = AArch64::bsub;
649 break;
650 case 16:
651 SubReg = AArch64::hsub;
652 break;
653 case 32:
654 if (RC != &AArch64::FPR32RegClass)
655 SubReg = AArch64::sub_32;
656 else
657 SubReg = AArch64::ssub;
658 break;
659 case 64:
660 SubReg = AArch64::dsub;
661 break;
662 default:
664 dbgs() << "Couldn't find appropriate subregister for register class.");
665 return false;
666 }
667
668 return true;
669}
670
671/// Returns the minimum size the given register bank can hold.
672static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
673 switch (RB.getID()) {
674 case AArch64::GPRRegBankID:
675 return 32;
676 case AArch64::FPRRegBankID:
677 return 8;
678 default:
679 llvm_unreachable("Tried to get minimum size for unknown register bank.");
680 }
681}
682
683/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
684/// Helper function for functions like createDTuple and createQTuple.
685///
686/// \p RegClassIDs - The list of register class IDs available for some tuple of
687/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
688/// expected to contain between 2 and 4 tuple classes.
689///
690/// \p SubRegs - The list of subregister classes associated with each register
691/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
692/// subregister class. The index of each subregister class is expected to
693/// correspond with the index of each register class.
694///
695/// \returns Either the destination register of REG_SEQUENCE instruction that
696/// was created, or the 0th element of \p Regs if \p Regs contains a single
697/// element.
699 const unsigned RegClassIDs[],
700 const unsigned SubRegs[], MachineIRBuilder &MIB) {
701 unsigned NumRegs = Regs.size();
702 if (NumRegs == 1)
703 return Regs[0];
704 assert(NumRegs >= 2 && NumRegs <= 4 &&
705 "Only support between two and 4 registers in a tuple!");
707 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
708 auto RegSequence =
709 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
710 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
711 RegSequence.addUse(Regs[I]);
712 RegSequence.addImm(SubRegs[I]);
713 }
714 return RegSequence.getReg(0);
715}
716
717/// Create a tuple of D-registers using the registers in \p Regs.
719 static const unsigned RegClassIDs[] = {
720 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
721 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
722 AArch64::dsub2, AArch64::dsub3};
723 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
724}
725
726/// Create a tuple of Q-registers using the registers in \p Regs.
728 static const unsigned RegClassIDs[] = {
729 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
730 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
731 AArch64::qsub2, AArch64::qsub3};
732 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
733}
734
735static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
736 auto &MI = *Root.getParent();
737 auto &MBB = *MI.getParent();
738 auto &MF = *MBB.getParent();
739 auto &MRI = MF.getRegInfo();
740 uint64_t Immed;
741 if (Root.isImm())
742 Immed = Root.getImm();
743 else if (Root.isCImm())
744 Immed = Root.getCImm()->getZExtValue();
745 else if (Root.isReg()) {
746 auto ValAndVReg =
748 if (!ValAndVReg)
749 return std::nullopt;
750 Immed = ValAndVReg->Value.getSExtValue();
751 } else
752 return std::nullopt;
753 return Immed;
754}
755
756/// Check whether \p I is a currently unsupported binary operation:
757/// - it has an unsized type
758/// - an operand is not a vreg
759/// - all operands are not in the same bank
760/// These are checks that should someday live in the verifier, but right now,
761/// these are mostly limitations of the aarch64 selector.
762static bool unsupportedBinOp(const MachineInstr &I,
763 const AArch64RegisterBankInfo &RBI,
765 const AArch64RegisterInfo &TRI) {
766 LLT Ty = MRI.getType(I.getOperand(0).getReg());
767 if (!Ty.isValid()) {
768 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
769 return true;
770 }
771
772 const RegisterBank *PrevOpBank = nullptr;
773 for (auto &MO : I.operands()) {
774 // FIXME: Support non-register operands.
775 if (!MO.isReg()) {
776 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
777 return true;
778 }
779
780 // FIXME: Can generic operations have physical registers operands? If
781 // so, this will need to be taught about that, and we'll need to get the
782 // bank out of the minimal class for the register.
783 // Either way, this needs to be documented (and possibly verified).
784 if (!MO.getReg().isVirtual()) {
785 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
786 return true;
787 }
788
789 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
790 if (!OpBank) {
791 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
792 return true;
793 }
794
795 if (PrevOpBank && OpBank != PrevOpBank) {
796 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
797 return true;
798 }
799 PrevOpBank = OpBank;
800 }
801 return false;
802}
803
804/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
805/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
806/// and of size \p OpSize.
807/// \returns \p GenericOpc if the combination is unsupported.
808static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
809 unsigned OpSize) {
810 switch (RegBankID) {
811 case AArch64::GPRRegBankID:
812 if (OpSize == 32) {
813 switch (GenericOpc) {
814 case TargetOpcode::G_SHL:
815 return AArch64::LSLVWr;
816 case TargetOpcode::G_LSHR:
817 return AArch64::LSRVWr;
818 case TargetOpcode::G_ASHR:
819 return AArch64::ASRVWr;
820 default:
821 return GenericOpc;
822 }
823 } else if (OpSize == 64) {
824 switch (GenericOpc) {
825 case TargetOpcode::G_PTR_ADD:
826 return AArch64::ADDXrr;
827 case TargetOpcode::G_SHL:
828 return AArch64::LSLVXr;
829 case TargetOpcode::G_LSHR:
830 return AArch64::LSRVXr;
831 case TargetOpcode::G_ASHR:
832 return AArch64::ASRVXr;
833 default:
834 return GenericOpc;
835 }
836 }
837 break;
838 case AArch64::FPRRegBankID:
839 switch (OpSize) {
840 case 32:
841 switch (GenericOpc) {
842 case TargetOpcode::G_FADD:
843 return AArch64::FADDSrr;
844 case TargetOpcode::G_FSUB:
845 return AArch64::FSUBSrr;
846 case TargetOpcode::G_FMUL:
847 return AArch64::FMULSrr;
848 case TargetOpcode::G_FDIV:
849 return AArch64::FDIVSrr;
850 default:
851 return GenericOpc;
852 }
853 case 64:
854 switch (GenericOpc) {
855 case TargetOpcode::G_FADD:
856 return AArch64::FADDDrr;
857 case TargetOpcode::G_FSUB:
858 return AArch64::FSUBDrr;
859 case TargetOpcode::G_FMUL:
860 return AArch64::FMULDrr;
861 case TargetOpcode::G_FDIV:
862 return AArch64::FDIVDrr;
863 case TargetOpcode::G_OR:
864 return AArch64::ORRv8i8;
865 default:
866 return GenericOpc;
867 }
868 }
869 break;
870 }
871 return GenericOpc;
872}
873
874/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
875/// appropriate for the (value) register bank \p RegBankID and of memory access
876/// size \p OpSize. This returns the variant with the base+unsigned-immediate
877/// addressing mode (e.g., LDRXui).
878/// \returns \p GenericOpc if the combination is unsupported.
879static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
880 unsigned OpSize) {
881 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
882 switch (RegBankID) {
883 case AArch64::GPRRegBankID:
884 switch (OpSize) {
885 case 8:
886 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
887 case 16:
888 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
889 case 32:
890 return isStore ? AArch64::STRWui : AArch64::LDRWui;
891 case 64:
892 return isStore ? AArch64::STRXui : AArch64::LDRXui;
893 }
894 break;
895 case AArch64::FPRRegBankID:
896 switch (OpSize) {
897 case 8:
898 return isStore ? AArch64::STRBui : AArch64::LDRBui;
899 case 16:
900 return isStore ? AArch64::STRHui : AArch64::LDRHui;
901 case 32:
902 return isStore ? AArch64::STRSui : AArch64::LDRSui;
903 case 64:
904 return isStore ? AArch64::STRDui : AArch64::LDRDui;
905 case 128:
906 return isStore ? AArch64::STRQui : AArch64::LDRQui;
907 }
908 break;
909 }
910 return GenericOpc;
911}
912
913/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
914/// to \p *To.
915///
916/// E.g "To = COPY SrcReg:SubReg"
918 const RegisterBankInfo &RBI, Register SrcReg,
919 const TargetRegisterClass *To, unsigned SubReg) {
920 assert(SrcReg.isValid() && "Expected a valid source register?");
921 assert(To && "Destination register class cannot be null");
922 assert(SubReg && "Expected a valid subregister");
923
924 MachineIRBuilder MIB(I);
925 auto SubRegCopy =
926 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
927 MachineOperand &RegOp = I.getOperand(1);
928 RegOp.setReg(SubRegCopy.getReg(0));
929
930 // It's possible that the destination register won't be constrained. Make
931 // sure that happens.
932 if (!I.getOperand(0).getReg().isPhysical())
933 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
934
935 return true;
936}
937
938/// Helper function to get the source and destination register classes for a
939/// copy. Returns a std::pair containing the source register class for the
940/// copy, and the destination register class for the copy. If a register class
941/// cannot be determined, then it will be nullptr.
942static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
945 const RegisterBankInfo &RBI) {
946 Register DstReg = I.getOperand(0).getReg();
947 Register SrcReg = I.getOperand(1).getReg();
948 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
949 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
950
951 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
952 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
953
954 // Special casing for cross-bank copies of s1s. We can technically represent
955 // a 1-bit value with any size of register. The minimum size for a GPR is 32
956 // bits. So, we need to put the FPR on 32 bits as well.
957 //
958 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
959 // then we can pull it into the helpers that get the appropriate class for a
960 // register bank. Or make a new helper that carries along some constraint
961 // information.
962 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
963 SrcSize = DstSize = TypeSize::getFixed(32);
964
965 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
966 getMinClassForRegBank(DstRegBank, DstSize, true)};
967}
968
969// FIXME: We need some sort of API in RBI/TRI to allow generic code to
970// constrain operands of simple instructions given a TargetRegisterClass
971// and LLT
973 const RegisterBankInfo &RBI) {
974 for (MachineOperand &MO : I.operands()) {
975 if (!MO.isReg())
976 continue;
977 Register Reg = MO.getReg();
978 if (!Reg)
979 continue;
980 if (Reg.isPhysical())
981 continue;
982 LLT Ty = MRI.getType(Reg);
983 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
984 const TargetRegisterClass *RC =
985 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
986 if (!RC) {
987 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
988 RC = getRegClassForTypeOnBank(Ty, RB);
989 if (!RC) {
991 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
992 break;
993 }
994 }
995 RBI.constrainGenericRegister(Reg, *RC, MRI);
996 }
997
998 return true;
999}
1000
1003 const RegisterBankInfo &RBI) {
1004 Register DstReg = I.getOperand(0).getReg();
1005 Register SrcReg = I.getOperand(1).getReg();
1006 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1007 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1008
1009 // Find the correct register classes for the source and destination registers.
1010 const TargetRegisterClass *SrcRC;
1011 const TargetRegisterClass *DstRC;
1012 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1013
1014 if (!DstRC) {
1015 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1016 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1017 return false;
1018 }
1019
1020 // Is this a copy? If so, then we may need to insert a subregister copy.
1021 if (I.isCopy()) {
1022 // Yes. Check if there's anything to fix up.
1023 if (!SrcRC) {
1024 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1025 return false;
1026 }
1027
1028 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1029 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1030 unsigned SubReg;
1031
1032 // If the source bank doesn't support a subregister copy small enough,
1033 // then we first need to copy to the destination bank.
1034 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1035 const TargetRegisterClass *DstTempRC =
1036 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1037 getSubRegForClass(DstRC, TRI, SubReg);
1038
1039 MachineIRBuilder MIB(I);
1040 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1041 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1042 } else if (SrcSize > DstSize) {
1043 // If the source register is bigger than the destination we need to
1044 // perform a subregister copy.
1045 const TargetRegisterClass *SubRegRC =
1046 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1047 getSubRegForClass(SubRegRC, TRI, SubReg);
1048 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1049 } else if (DstSize > SrcSize) {
1050 // If the destination register is bigger than the source we need to do
1051 // a promotion using SUBREG_TO_REG.
1052 const TargetRegisterClass *PromotionRC =
1053 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1054 getSubRegForClass(SrcRC, TRI, SubReg);
1055
1056 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1057 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1058 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1059 .addImm(0)
1060 .addUse(SrcReg)
1061 .addImm(SubReg);
1062 MachineOperand &RegOp = I.getOperand(1);
1063 RegOp.setReg(PromoteReg);
1064 }
1065
1066 // If the destination is a physical register, then there's nothing to
1067 // change, so we're done.
1068 if (DstReg.isPhysical())
1069 return true;
1070 }
1071
1072 // No need to constrain SrcReg. It will get constrained when we hit another
1073 // of its use or its defs. Copies do not have constraints.
1074 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1075 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1076 << " operand\n");
1077 return false;
1078 }
1079
1080 // If this a GPR ZEXT that we want to just reduce down into a copy.
1081 // The sizes will be mismatched with the source < 32b but that's ok.
1082 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1083 I.setDesc(TII.get(AArch64::COPY));
1084 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1085 return selectCopy(I, TII, MRI, TRI, RBI);
1086 }
1087
1088 I.setDesc(TII.get(AArch64::COPY));
1089 return true;
1090}
1091
1092static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1093 if (!DstTy.isScalar() || !SrcTy.isScalar())
1094 return GenericOpc;
1095
1096 const unsigned DstSize = DstTy.getSizeInBits();
1097 const unsigned SrcSize = SrcTy.getSizeInBits();
1098
1099 switch (DstSize) {
1100 case 32:
1101 switch (SrcSize) {
1102 case 32:
1103 switch (GenericOpc) {
1104 case TargetOpcode::G_SITOFP:
1105 return AArch64::SCVTFUWSri;
1106 case TargetOpcode::G_UITOFP:
1107 return AArch64::UCVTFUWSri;
1108 case TargetOpcode::G_FPTOSI:
1109 return AArch64::FCVTZSUWSr;
1110 case TargetOpcode::G_FPTOUI:
1111 return AArch64::FCVTZUUWSr;
1112 default:
1113 return GenericOpc;
1114 }
1115 case 64:
1116 switch (GenericOpc) {
1117 case TargetOpcode::G_SITOFP:
1118 return AArch64::SCVTFUXSri;
1119 case TargetOpcode::G_UITOFP:
1120 return AArch64::UCVTFUXSri;
1121 case TargetOpcode::G_FPTOSI:
1122 return AArch64::FCVTZSUWDr;
1123 case TargetOpcode::G_FPTOUI:
1124 return AArch64::FCVTZUUWDr;
1125 default:
1126 return GenericOpc;
1127 }
1128 default:
1129 return GenericOpc;
1130 }
1131 case 64:
1132 switch (SrcSize) {
1133 case 32:
1134 switch (GenericOpc) {
1135 case TargetOpcode::G_SITOFP:
1136 return AArch64::SCVTFUWDri;
1137 case TargetOpcode::G_UITOFP:
1138 return AArch64::UCVTFUWDri;
1139 case TargetOpcode::G_FPTOSI:
1140 return AArch64::FCVTZSUXSr;
1141 case TargetOpcode::G_FPTOUI:
1142 return AArch64::FCVTZUUXSr;
1143 default:
1144 return GenericOpc;
1145 }
1146 case 64:
1147 switch (GenericOpc) {
1148 case TargetOpcode::G_SITOFP:
1149 return AArch64::SCVTFUXDri;
1150 case TargetOpcode::G_UITOFP:
1151 return AArch64::UCVTFUXDri;
1152 case TargetOpcode::G_FPTOSI:
1153 return AArch64::FCVTZSUXDr;
1154 case TargetOpcode::G_FPTOUI:
1155 return AArch64::FCVTZUUXDr;
1156 default:
1157 return GenericOpc;
1158 }
1159 default:
1160 return GenericOpc;
1161 }
1162 default:
1163 return GenericOpc;
1164 };
1165 return GenericOpc;
1166}
1167
1169AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1171 MachineIRBuilder &MIB) const {
1172 MachineRegisterInfo &MRI = *MIB.getMRI();
1173 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1174 RBI.getRegBank(True, MRI, TRI)->getID() &&
1175 "Expected both select operands to have the same regbank?");
1176 LLT Ty = MRI.getType(True);
1177 if (Ty.isVector())
1178 return nullptr;
1179 const unsigned Size = Ty.getSizeInBits();
1180 assert((Size == 32 || Size == 64) &&
1181 "Expected 32 bit or 64 bit select only?");
1182 const bool Is32Bit = Size == 32;
1183 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1184 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1185 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1187 return &*FCSel;
1188 }
1189
1190 // By default, we'll try and emit a CSEL.
1191 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1192 bool Optimized = false;
1193 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1194 &Optimized](Register &Reg, Register &OtherReg,
1195 bool Invert) {
1196 if (Optimized)
1197 return false;
1198
1199 // Attempt to fold:
1200 //
1201 // %sub = G_SUB 0, %x
1202 // %select = G_SELECT cc, %reg, %sub
1203 //
1204 // Into:
1205 // %select = CSNEG %reg, %x, cc
1206 Register MatchReg;
1207 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1208 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1209 Reg = MatchReg;
1210 if (Invert) {
1212 std::swap(Reg, OtherReg);
1213 }
1214 return true;
1215 }
1216
1217 // Attempt to fold:
1218 //
1219 // %xor = G_XOR %x, -1
1220 // %select = G_SELECT cc, %reg, %xor
1221 //
1222 // Into:
1223 // %select = CSINV %reg, %x, cc
1224 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1225 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1226 Reg = MatchReg;
1227 if (Invert) {
1229 std::swap(Reg, OtherReg);
1230 }
1231 return true;
1232 }
1233
1234 // Attempt to fold:
1235 //
1236 // %add = G_ADD %x, 1
1237 // %select = G_SELECT cc, %reg, %add
1238 //
1239 // Into:
1240 // %select = CSINC %reg, %x, cc
1241 if (mi_match(Reg, MRI,
1242 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1243 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1244 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1245 Reg = MatchReg;
1246 if (Invert) {
1248 std::swap(Reg, OtherReg);
1249 }
1250 return true;
1251 }
1252
1253 return false;
1254 };
1255
1256 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1257 // true/false values are constants.
1258 // FIXME: All of these patterns already exist in tablegen. We should be
1259 // able to import these.
1260 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1261 &Optimized]() {
1262 if (Optimized)
1263 return false;
1264 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1265 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1266 if (!TrueCst && !FalseCst)
1267 return false;
1268
1269 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1270 if (TrueCst && FalseCst) {
1271 int64_t T = TrueCst->Value.getSExtValue();
1272 int64_t F = FalseCst->Value.getSExtValue();
1273
1274 if (T == 0 && F == 1) {
1275 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1276 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1277 True = ZReg;
1278 False = ZReg;
1279 return true;
1280 }
1281
1282 if (T == 0 && F == -1) {
1283 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1284 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1285 True = ZReg;
1286 False = ZReg;
1287 return true;
1288 }
1289 }
1290
1291 if (TrueCst) {
1292 int64_t T = TrueCst->Value.getSExtValue();
1293 if (T == 1) {
1294 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1295 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1296 True = False;
1297 False = ZReg;
1299 return true;
1300 }
1301
1302 if (T == -1) {
1303 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1304 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1305 True = False;
1306 False = ZReg;
1308 return true;
1309 }
1310 }
1311
1312 if (FalseCst) {
1313 int64_t F = FalseCst->Value.getSExtValue();
1314 if (F == 1) {
1315 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1316 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1317 False = ZReg;
1318 return true;
1319 }
1320
1321 if (F == -1) {
1322 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1323 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1324 False = ZReg;
1325 return true;
1326 }
1327 }
1328 return false;
1329 };
1330
1331 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1332 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1333 Optimized |= TryOptSelectCst();
1334 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1336 return &*SelectInst;
1337}
1338
1340 switch (P) {
1341 default:
1342 llvm_unreachable("Unknown condition code!");
1343 case CmpInst::ICMP_NE:
1344 return AArch64CC::NE;
1345 case CmpInst::ICMP_EQ:
1346 return AArch64CC::EQ;
1347 case CmpInst::ICMP_SGT:
1348 return AArch64CC::GT;
1349 case CmpInst::ICMP_SGE:
1350 return AArch64CC::GE;
1351 case CmpInst::ICMP_SLT:
1352 return AArch64CC::LT;
1353 case CmpInst::ICMP_SLE:
1354 return AArch64CC::LE;
1355 case CmpInst::ICMP_UGT:
1356 return AArch64CC::HI;
1357 case CmpInst::ICMP_UGE:
1358 return AArch64CC::HS;
1359 case CmpInst::ICMP_ULT:
1360 return AArch64CC::LO;
1361 case CmpInst::ICMP_ULE:
1362 return AArch64CC::LS;
1363 }
1364}
1365
1366/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1368 AArch64CC::CondCode &CondCode,
1369 AArch64CC::CondCode &CondCode2) {
1370 CondCode2 = AArch64CC::AL;
1371 switch (CC) {
1372 default:
1373 llvm_unreachable("Unknown FP condition!");
1374 case CmpInst::FCMP_OEQ:
1375 CondCode = AArch64CC::EQ;
1376 break;
1377 case CmpInst::FCMP_OGT:
1378 CondCode = AArch64CC::GT;
1379 break;
1380 case CmpInst::FCMP_OGE:
1381 CondCode = AArch64CC::GE;
1382 break;
1383 case CmpInst::FCMP_OLT:
1384 CondCode = AArch64CC::MI;
1385 break;
1386 case CmpInst::FCMP_OLE:
1387 CondCode = AArch64CC::LS;
1388 break;
1389 case CmpInst::FCMP_ONE:
1390 CondCode = AArch64CC::MI;
1391 CondCode2 = AArch64CC::GT;
1392 break;
1393 case CmpInst::FCMP_ORD:
1394 CondCode = AArch64CC::VC;
1395 break;
1396 case CmpInst::FCMP_UNO:
1397 CondCode = AArch64CC::VS;
1398 break;
1399 case CmpInst::FCMP_UEQ:
1400 CondCode = AArch64CC::EQ;
1401 CondCode2 = AArch64CC::VS;
1402 break;
1403 case CmpInst::FCMP_UGT:
1404 CondCode = AArch64CC::HI;
1405 break;
1406 case CmpInst::FCMP_UGE:
1407 CondCode = AArch64CC::PL;
1408 break;
1409 case CmpInst::FCMP_ULT:
1410 CondCode = AArch64CC::LT;
1411 break;
1412 case CmpInst::FCMP_ULE:
1413 CondCode = AArch64CC::LE;
1414 break;
1415 case CmpInst::FCMP_UNE:
1416 CondCode = AArch64CC::NE;
1417 break;
1418 }
1419}
1420
1421/// Convert an IR fp condition code to an AArch64 CC.
1422/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1423/// should be AND'ed instead of OR'ed.
1425 AArch64CC::CondCode &CondCode,
1426 AArch64CC::CondCode &CondCode2) {
1427 CondCode2 = AArch64CC::AL;
1428 switch (CC) {
1429 default:
1430 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1431 assert(CondCode2 == AArch64CC::AL);
1432 break;
1433 case CmpInst::FCMP_ONE:
1434 // (a one b)
1435 // == ((a olt b) || (a ogt b))
1436 // == ((a ord b) && (a une b))
1437 CondCode = AArch64CC::VC;
1438 CondCode2 = AArch64CC::NE;
1439 break;
1440 case CmpInst::FCMP_UEQ:
1441 // (a ueq b)
1442 // == ((a uno b) || (a oeq b))
1443 // == ((a ule b) && (a uge b))
1444 CondCode = AArch64CC::PL;
1445 CondCode2 = AArch64CC::LE;
1446 break;
1447 }
1448}
1449
1450/// Return a register which can be used as a bit to test in a TB(N)Z.
1451static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1453 assert(Reg.isValid() && "Expected valid register!");
1454 bool HasZext = false;
1455 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1456 unsigned Opc = MI->getOpcode();
1457
1458 if (!MI->getOperand(0).isReg() ||
1459 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1460 break;
1461
1462 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1463 //
1464 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1465 // on the truncated x is the same as the bit number on x.
1466 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1467 Opc == TargetOpcode::G_TRUNC) {
1468 if (Opc == TargetOpcode::G_ZEXT)
1469 HasZext = true;
1470
1471 Register NextReg = MI->getOperand(1).getReg();
1472 // Did we find something worth folding?
1473 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1474 break;
1475
1476 // NextReg is worth folding. Keep looking.
1477 Reg = NextReg;
1478 continue;
1479 }
1480
1481 // Attempt to find a suitable operation with a constant on one side.
1482 std::optional<uint64_t> C;
1483 Register TestReg;
1484 switch (Opc) {
1485 default:
1486 break;
1487 case TargetOpcode::G_AND:
1488 case TargetOpcode::G_XOR: {
1489 TestReg = MI->getOperand(1).getReg();
1490 Register ConstantReg = MI->getOperand(2).getReg();
1491 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1492 if (!VRegAndVal) {
1493 // AND commutes, check the other side for a constant.
1494 // FIXME: Can we canonicalize the constant so that it's always on the
1495 // same side at some point earlier?
1496 std::swap(ConstantReg, TestReg);
1497 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1498 }
1499 if (VRegAndVal) {
1500 if (HasZext)
1501 C = VRegAndVal->Value.getZExtValue();
1502 else
1503 C = VRegAndVal->Value.getSExtValue();
1504 }
1505 break;
1506 }
1507 case TargetOpcode::G_ASHR:
1508 case TargetOpcode::G_LSHR:
1509 case TargetOpcode::G_SHL: {
1510 TestReg = MI->getOperand(1).getReg();
1511 auto VRegAndVal =
1512 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1513 if (VRegAndVal)
1514 C = VRegAndVal->Value.getSExtValue();
1515 break;
1516 }
1517 }
1518
1519 // Didn't find a constant or viable register. Bail out of the loop.
1520 if (!C || !TestReg.isValid())
1521 break;
1522
1523 // We found a suitable instruction with a constant. Check to see if we can
1524 // walk through the instruction.
1525 Register NextReg;
1526 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1527 switch (Opc) {
1528 default:
1529 break;
1530 case TargetOpcode::G_AND:
1531 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1532 if ((*C >> Bit) & 1)
1533 NextReg = TestReg;
1534 break;
1535 case TargetOpcode::G_SHL:
1536 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1537 // the type of the register.
1538 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1539 NextReg = TestReg;
1540 Bit = Bit - *C;
1541 }
1542 break;
1543 case TargetOpcode::G_ASHR:
1544 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1545 // in x
1546 NextReg = TestReg;
1547 Bit = Bit + *C;
1548 if (Bit >= TestRegSize)
1549 Bit = TestRegSize - 1;
1550 break;
1551 case TargetOpcode::G_LSHR:
1552 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1553 if ((Bit + *C) < TestRegSize) {
1554 NextReg = TestReg;
1555 Bit = Bit + *C;
1556 }
1557 break;
1558 case TargetOpcode::G_XOR:
1559 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1560 // appropriate.
1561 //
1562 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1563 //
1564 // tbz x', b -> tbnz x, b
1565 //
1566 // Because x' only has the b-th bit set if x does not.
1567 if ((*C >> Bit) & 1)
1568 Invert = !Invert;
1569 NextReg = TestReg;
1570 break;
1571 }
1572
1573 // Check if we found anything worth folding.
1574 if (!NextReg.isValid())
1575 return Reg;
1576 Reg = NextReg;
1577 }
1578
1579 return Reg;
1580}
1581
1582MachineInstr *AArch64InstructionSelector::emitTestBit(
1583 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1584 MachineIRBuilder &MIB) const {
1585 assert(TestReg.isValid());
1586 assert(ProduceNonFlagSettingCondBr &&
1587 "Cannot emit TB(N)Z with speculation tracking!");
1588 MachineRegisterInfo &MRI = *MIB.getMRI();
1589
1590 // Attempt to optimize the test bit by walking over instructions.
1591 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1592 LLT Ty = MRI.getType(TestReg);
1593 unsigned Size = Ty.getSizeInBits();
1594 assert(!Ty.isVector() && "Expected a scalar!");
1595 assert(Bit < 64 && "Bit is too large!");
1596
1597 // When the test register is a 64-bit register, we have to narrow to make
1598 // TBNZW work.
1599 bool UseWReg = Bit < 32;
1600 unsigned NecessarySize = UseWReg ? 32 : 64;
1601 if (Size != NecessarySize)
1602 TestReg = moveScalarRegClass(
1603 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1604 MIB);
1605
1606 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1607 {AArch64::TBZW, AArch64::TBNZW}};
1608 unsigned Opc = OpcTable[UseWReg][IsNegative];
1609 auto TestBitMI =
1610 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1611 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1612 return &*TestBitMI;
1613}
1614
1615bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1616 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1617 MachineIRBuilder &MIB) const {
1618 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1619 // Given something like this:
1620 //
1621 // %x = ...Something...
1622 // %one = G_CONSTANT i64 1
1623 // %zero = G_CONSTANT i64 0
1624 // %and = G_AND %x, %one
1625 // %cmp = G_ICMP intpred(ne), %and, %zero
1626 // %cmp_trunc = G_TRUNC %cmp
1627 // G_BRCOND %cmp_trunc, %bb.3
1628 //
1629 // We want to try and fold the AND into the G_BRCOND and produce either a
1630 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1631 //
1632 // In this case, we'd get
1633 //
1634 // TBNZ %x %bb.3
1635 //
1636
1637 // Check if the AND has a constant on its RHS which we can use as a mask.
1638 // If it's a power of 2, then it's the same as checking a specific bit.
1639 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1640 auto MaybeBit = getIConstantVRegValWithLookThrough(
1641 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1642 if (!MaybeBit)
1643 return false;
1644
1645 int32_t Bit = MaybeBit->Value.exactLogBase2();
1646 if (Bit < 0)
1647 return false;
1648
1649 Register TestReg = AndInst.getOperand(1).getReg();
1650
1651 // Emit a TB(N)Z.
1652 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1653 return true;
1654}
1655
1656MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1657 bool IsNegative,
1658 MachineBasicBlock *DestMBB,
1659 MachineIRBuilder &MIB) const {
1660 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1661 MachineRegisterInfo &MRI = *MIB.getMRI();
1662 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1663 AArch64::GPRRegBankID &&
1664 "Expected GPRs only?");
1665 auto Ty = MRI.getType(CompareReg);
1666 unsigned Width = Ty.getSizeInBits();
1667 assert(!Ty.isVector() && "Expected scalar only?");
1668 assert(Width <= 64 && "Expected width to be at most 64?");
1669 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1670 {AArch64::CBNZW, AArch64::CBNZX}};
1671 unsigned Opc = OpcTable[IsNegative][Width == 64];
1672 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1673 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1674 return &*BranchMI;
1675}
1676
1677bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1678 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1679 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1680 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1681 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1682 // totally clean. Some of them require two branches to implement.
1683 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1684 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1685 Pred);
1686 AArch64CC::CondCode CC1, CC2;
1687 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1688 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1689 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1690 if (CC2 != AArch64CC::AL)
1691 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1692 I.eraseFromParent();
1693 return true;
1694}
1695
1696bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1697 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1698 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1699 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1700 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1701 //
1702 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1703 // instructions will not be produced, as they are conditional branch
1704 // instructions that do not set flags.
1705 if (!ProduceNonFlagSettingCondBr)
1706 return false;
1707
1708 MachineRegisterInfo &MRI = *MIB.getMRI();
1709 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1710 auto Pred =
1711 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1712 Register LHS = ICmp.getOperand(2).getReg();
1713 Register RHS = ICmp.getOperand(3).getReg();
1714
1715 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1716 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1717 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1718
1719 // When we can emit a TB(N)Z, prefer that.
1720 //
1721 // Handle non-commutative condition codes first.
1722 // Note that we don't want to do this when we have a G_AND because it can
1723 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1724 if (VRegAndVal && !AndInst) {
1725 int64_t C = VRegAndVal->Value.getSExtValue();
1726
1727 // When we have a greater-than comparison, we can just test if the msb is
1728 // zero.
1729 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1730 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1731 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1732 I.eraseFromParent();
1733 return true;
1734 }
1735
1736 // When we have a less than comparison, we can just test if the msb is not
1737 // zero.
1738 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1739 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1740 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1741 I.eraseFromParent();
1742 return true;
1743 }
1744
1745 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1746 // we can test if the msb is zero.
1747 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1748 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1749 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1750 I.eraseFromParent();
1751 return true;
1752 }
1753 }
1754
1755 // Attempt to handle commutative condition codes. Right now, that's only
1756 // eq/ne.
1757 if (ICmpInst::isEquality(Pred)) {
1758 if (!VRegAndVal) {
1759 std::swap(RHS, LHS);
1760 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1761 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1762 }
1763
1764 if (VRegAndVal && VRegAndVal->Value == 0) {
1765 // If there's a G_AND feeding into this branch, try to fold it away by
1766 // emitting a TB(N)Z instead.
1767 //
1768 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1769 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1770 // would be redundant.
1771 if (AndInst &&
1772 tryOptAndIntoCompareBranch(
1773 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1774 I.eraseFromParent();
1775 return true;
1776 }
1777
1778 // Otherwise, try to emit a CB(N)Z instead.
1779 auto LHSTy = MRI.getType(LHS);
1780 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1781 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1782 I.eraseFromParent();
1783 return true;
1784 }
1785 }
1786 }
1787
1788 return false;
1789}
1790
1791bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1792 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1793 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1794 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1795 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1796 return true;
1797
1798 // Couldn't optimize. Emit a compare + a Bcc.
1799 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1800 auto PredOp = ICmp.getOperand(1);
1801 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1803 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1804 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1805 I.eraseFromParent();
1806 return true;
1807}
1808
1809bool AArch64InstructionSelector::selectCompareBranch(
1811 Register CondReg = I.getOperand(0).getReg();
1812 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1813 // Try to select the G_BRCOND using whatever is feeding the condition if
1814 // possible.
1815 unsigned CCMIOpc = CCMI->getOpcode();
1816 if (CCMIOpc == TargetOpcode::G_FCMP)
1817 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1818 if (CCMIOpc == TargetOpcode::G_ICMP)
1819 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1820
1821 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1822 // instructions will not be produced, as they are conditional branch
1823 // instructions that do not set flags.
1824 if (ProduceNonFlagSettingCondBr) {
1825 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1826 I.getOperand(1).getMBB(), MIB);
1827 I.eraseFromParent();
1828 return true;
1829 }
1830
1831 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1832 auto TstMI =
1833 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1835 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1837 .addMBB(I.getOperand(1).getMBB());
1838 I.eraseFromParent();
1839 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1840}
1841
1842/// Returns the element immediate value of a vector shift operand if found.
1843/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1844static std::optional<int64_t> getVectorShiftImm(Register Reg,
1846 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1847 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1848 return getAArch64VectorSplatScalar(*OpMI, MRI);
1849}
1850
1851/// Matches and returns the shift immediate value for a SHL instruction given
1852/// a shift operand.
1853static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1855 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1856 if (!ShiftImm)
1857 return std::nullopt;
1858 // Check the immediate is in range for a SHL.
1859 int64_t Imm = *ShiftImm;
1860 if (Imm < 0)
1861 return std::nullopt;
1862 switch (SrcTy.getElementType().getSizeInBits()) {
1863 default:
1864 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1865 return std::nullopt;
1866 case 8:
1867 if (Imm > 7)
1868 return std::nullopt;
1869 break;
1870 case 16:
1871 if (Imm > 15)
1872 return std::nullopt;
1873 break;
1874 case 32:
1875 if (Imm > 31)
1876 return std::nullopt;
1877 break;
1878 case 64:
1879 if (Imm > 63)
1880 return std::nullopt;
1881 break;
1882 }
1883 return Imm;
1884}
1885
1886bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1888 assert(I.getOpcode() == TargetOpcode::G_SHL);
1889 Register DstReg = I.getOperand(0).getReg();
1890 const LLT Ty = MRI.getType(DstReg);
1891 Register Src1Reg = I.getOperand(1).getReg();
1892 Register Src2Reg = I.getOperand(2).getReg();
1893
1894 if (!Ty.isVector())
1895 return false;
1896
1897 // Check if we have a vector of constants on RHS that we can select as the
1898 // immediate form.
1899 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1900
1901 unsigned Opc = 0;
1902 if (Ty == LLT::fixed_vector(2, 64)) {
1903 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1904 } else if (Ty == LLT::fixed_vector(4, 32)) {
1905 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1906 } else if (Ty == LLT::fixed_vector(2, 32)) {
1907 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1908 } else if (Ty == LLT::fixed_vector(4, 16)) {
1909 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1910 } else if (Ty == LLT::fixed_vector(8, 16)) {
1911 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1912 } else if (Ty == LLT::fixed_vector(16, 8)) {
1913 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1914 } else if (Ty == LLT::fixed_vector(8, 8)) {
1915 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1916 } else {
1917 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1918 return false;
1919 }
1920
1921 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1922 if (ImmVal)
1923 Shl.addImm(*ImmVal);
1924 else
1925 Shl.addUse(Src2Reg);
1927 I.eraseFromParent();
1928 return true;
1929}
1930
1931bool AArch64InstructionSelector::selectVectorAshrLshr(
1933 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1934 I.getOpcode() == TargetOpcode::G_LSHR);
1935 Register DstReg = I.getOperand(0).getReg();
1936 const LLT Ty = MRI.getType(DstReg);
1937 Register Src1Reg = I.getOperand(1).getReg();
1938 Register Src2Reg = I.getOperand(2).getReg();
1939
1940 if (!Ty.isVector())
1941 return false;
1942
1943 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1944
1945 // We expect the immediate case to be lowered in the PostLegalCombiner to
1946 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1947
1948 // There is not a shift right register instruction, but the shift left
1949 // register instruction takes a signed value, where negative numbers specify a
1950 // right shift.
1951
1952 unsigned Opc = 0;
1953 unsigned NegOpc = 0;
1954 const TargetRegisterClass *RC =
1955 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1956 if (Ty == LLT::fixed_vector(2, 64)) {
1957 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1958 NegOpc = AArch64::NEGv2i64;
1959 } else if (Ty == LLT::fixed_vector(4, 32)) {
1960 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1961 NegOpc = AArch64::NEGv4i32;
1962 } else if (Ty == LLT::fixed_vector(2, 32)) {
1963 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1964 NegOpc = AArch64::NEGv2i32;
1965 } else if (Ty == LLT::fixed_vector(4, 16)) {
1966 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1967 NegOpc = AArch64::NEGv4i16;
1968 } else if (Ty == LLT::fixed_vector(8, 16)) {
1969 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1970 NegOpc = AArch64::NEGv8i16;
1971 } else if (Ty == LLT::fixed_vector(16, 8)) {
1972 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1973 NegOpc = AArch64::NEGv16i8;
1974 } else if (Ty == LLT::fixed_vector(8, 8)) {
1975 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1976 NegOpc = AArch64::NEGv8i8;
1977 } else {
1978 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1979 return false;
1980 }
1981
1982 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1984 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1986 I.eraseFromParent();
1987 return true;
1988}
1989
1990bool AArch64InstructionSelector::selectVaStartAAPCS(
1992 return false;
1993}
1994
1995bool AArch64InstructionSelector::selectVaStartDarwin(
1998 Register ListReg = I.getOperand(0).getReg();
1999
2000 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2001
2002 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2004 MF.getFunction().getCallingConv())) {
2005 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2006 ? FuncInfo->getVarArgsGPRIndex()
2007 : FuncInfo->getVarArgsStackIndex();
2008 }
2009
2010 auto MIB =
2011 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2012 .addDef(ArgsAddrReg)
2013 .addFrameIndex(FrameIdx)
2014 .addImm(0)
2015 .addImm(0);
2016
2018
2019 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2020 .addUse(ArgsAddrReg)
2021 .addUse(ListReg)
2022 .addImm(0)
2023 .addMemOperand(*I.memoperands_begin());
2024
2026 I.eraseFromParent();
2027 return true;
2028}
2029
2030void AArch64InstructionSelector::materializeLargeCMVal(
2031 MachineInstr &I, const Value *V, unsigned OpFlags) {
2032 MachineBasicBlock &MBB = *I.getParent();
2033 MachineFunction &MF = *MBB.getParent();
2035
2036 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2037 MovZ->addOperand(MF, I.getOperand(1));
2038 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2040 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2042
2043 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2044 Register ForceDstReg) {
2045 Register DstReg = ForceDstReg
2046 ? ForceDstReg
2047 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2048 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2049 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2051 GV, MovZ->getOperand(1).getOffset(), Flags));
2052 } else {
2053 MovI->addOperand(
2054 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2055 MovZ->getOperand(1).getOffset(), Flags));
2056 }
2057 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2059 return DstReg;
2060 };
2061 Register DstReg = BuildMovK(MovZ.getReg(0),
2063 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2064 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2065}
2066
2067bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2068 MachineBasicBlock &MBB = *I.getParent();
2069 MachineFunction &MF = *MBB.getParent();
2071
2072 switch (I.getOpcode()) {
2073 case TargetOpcode::G_STORE: {
2074 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2075 MachineOperand &SrcOp = I.getOperand(0);
2076 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2077 // Allow matching with imported patterns for stores of pointers. Unlike
2078 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2079 // and constrain.
2080 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2081 Register NewSrc = Copy.getReg(0);
2082 SrcOp.setReg(NewSrc);
2083 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2084 Changed = true;
2085 }
2086 return Changed;
2087 }
2088 case TargetOpcode::G_PTR_ADD:
2089 return convertPtrAddToAdd(I, MRI);
2090 case TargetOpcode::G_LOAD: {
2091 // For scalar loads of pointers, we try to convert the dest type from p0
2092 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2093 // conversion, this should be ok because all users should have been
2094 // selected already, so the type doesn't matter for them.
2095 Register DstReg = I.getOperand(0).getReg();
2096 const LLT DstTy = MRI.getType(DstReg);
2097 if (!DstTy.isPointer())
2098 return false;
2099 MRI.setType(DstReg, LLT::scalar(64));
2100 return true;
2101 }
2102 case AArch64::G_DUP: {
2103 // Convert the type from p0 to s64 to help selection.
2104 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2105 if (!DstTy.isPointerVector())
2106 return false;
2107 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2108 MRI.setType(I.getOperand(0).getReg(),
2109 DstTy.changeElementType(LLT::scalar(64)));
2110 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2111 I.getOperand(1).setReg(NewSrc.getReg(0));
2112 return true;
2113 }
2114 case TargetOpcode::G_UITOFP:
2115 case TargetOpcode::G_SITOFP: {
2116 // If both source and destination regbanks are FPR, then convert the opcode
2117 // to G_SITOF so that the importer can select it to an fpr variant.
2118 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2119 // copy.
2120 Register SrcReg = I.getOperand(1).getReg();
2121 LLT SrcTy = MRI.getType(SrcReg);
2122 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2123 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2124 return false;
2125
2126 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2127 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2128 I.setDesc(TII.get(AArch64::G_SITOF));
2129 else
2130 I.setDesc(TII.get(AArch64::G_UITOF));
2131 return true;
2132 }
2133 return false;
2134 }
2135 default:
2136 return false;
2137 }
2138}
2139
2140/// This lowering tries to look for G_PTR_ADD instructions and then converts
2141/// them to a standard G_ADD with a COPY on the source.
2142///
2143/// The motivation behind this is to expose the add semantics to the imported
2144/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2145/// because the selector works bottom up, uses before defs. By the time we
2146/// end up trying to select a G_PTR_ADD, we should have already attempted to
2147/// fold this into addressing modes and were therefore unsuccessful.
2148bool AArch64InstructionSelector::convertPtrAddToAdd(
2150 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2151 Register DstReg = I.getOperand(0).getReg();
2152 Register AddOp1Reg = I.getOperand(1).getReg();
2153 const LLT PtrTy = MRI.getType(DstReg);
2154 if (PtrTy.getAddressSpace() != 0)
2155 return false;
2156
2157 const LLT CastPtrTy =
2158 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2159 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2160 // Set regbanks on the registers.
2161 if (PtrTy.isVector())
2162 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2163 else
2164 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2165
2166 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2167 // %dst(intty) = G_ADD %intbase, off
2168 I.setDesc(TII.get(TargetOpcode::G_ADD));
2169 MRI.setType(DstReg, CastPtrTy);
2170 I.getOperand(1).setReg(PtrToInt.getReg(0));
2171 if (!select(*PtrToInt)) {
2172 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2173 return false;
2174 }
2175
2176 // Also take the opportunity here to try to do some optimization.
2177 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2178 Register NegatedReg;
2179 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2180 return true;
2181 I.getOperand(2).setReg(NegatedReg);
2182 I.setDesc(TII.get(TargetOpcode::G_SUB));
2183 return true;
2184}
2185
2186bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2188 // We try to match the immediate variant of LSL, which is actually an alias
2189 // for a special case of UBFM. Otherwise, we fall back to the imported
2190 // selector which will match the register variant.
2191 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2192 const auto &MO = I.getOperand(2);
2193 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2194 if (!VRegAndVal)
2195 return false;
2196
2197 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2198 if (DstTy.isVector())
2199 return false;
2200 bool Is64Bit = DstTy.getSizeInBits() == 64;
2201 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2202 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2203
2204 if (!Imm1Fn || !Imm2Fn)
2205 return false;
2206
2207 auto NewI =
2208 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2209 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2210
2211 for (auto &RenderFn : *Imm1Fn)
2212 RenderFn(NewI);
2213 for (auto &RenderFn : *Imm2Fn)
2214 RenderFn(NewI);
2215
2216 I.eraseFromParent();
2217 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2218}
2219
2220bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2222 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2223 // If we're storing a scalar, it doesn't matter what register bank that
2224 // scalar is on. All that matters is the size.
2225 //
2226 // So, if we see something like this (with a 32-bit scalar as an example):
2227 //
2228 // %x:gpr(s32) = ... something ...
2229 // %y:fpr(s32) = COPY %x:gpr(s32)
2230 // G_STORE %y:fpr(s32)
2231 //
2232 // We can fix this up into something like this:
2233 //
2234 // G_STORE %x:gpr(s32)
2235 //
2236 // And then continue the selection process normally.
2237 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2238 if (!DefDstReg.isValid())
2239 return false;
2240 LLT DefDstTy = MRI.getType(DefDstReg);
2241 Register StoreSrcReg = I.getOperand(0).getReg();
2242 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2243
2244 // If we get something strange like a physical register, then we shouldn't
2245 // go any further.
2246 if (!DefDstTy.isValid())
2247 return false;
2248
2249 // Are the source and dst types the same size?
2250 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2251 return false;
2252
2253 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2254 RBI.getRegBank(DefDstReg, MRI, TRI))
2255 return false;
2256
2257 // We have a cross-bank copy, which is entering a store. Let's fold it.
2258 I.getOperand(0).setReg(DefDstReg);
2259 return true;
2260}
2261
2262bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2263 assert(I.getParent() && "Instruction should be in a basic block!");
2264 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2265
2266 MachineBasicBlock &MBB = *I.getParent();
2267 MachineFunction &MF = *MBB.getParent();
2269
2270 switch (I.getOpcode()) {
2271 case AArch64::G_DUP: {
2272 // Before selecting a DUP instruction, check if it is better selected as a
2273 // MOV or load from a constant pool.
2274 Register Src = I.getOperand(1).getReg();
2275 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2276 if (!ValAndVReg)
2277 return false;
2278 LLVMContext &Ctx = MF.getFunction().getContext();
2279 Register Dst = I.getOperand(0).getReg();
2281 MRI.getType(Dst).getNumElements(),
2282 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2283 ValAndVReg->Value));
2284 if (!emitConstantVector(Dst, CV, MIB, MRI))
2285 return false;
2286 I.eraseFromParent();
2287 return true;
2288 }
2289 case TargetOpcode::G_SEXT:
2290 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2291 // over a normal extend.
2292 if (selectUSMovFromExtend(I, MRI))
2293 return true;
2294 return false;
2295 case TargetOpcode::G_BR:
2296 return false;
2297 case TargetOpcode::G_SHL:
2298 return earlySelectSHL(I, MRI);
2299 case TargetOpcode::G_CONSTANT: {
2300 bool IsZero = false;
2301 if (I.getOperand(1).isCImm())
2302 IsZero = I.getOperand(1).getCImm()->isZero();
2303 else if (I.getOperand(1).isImm())
2304 IsZero = I.getOperand(1).getImm() == 0;
2305
2306 if (!IsZero)
2307 return false;
2308
2309 Register DefReg = I.getOperand(0).getReg();
2310 LLT Ty = MRI.getType(DefReg);
2311 if (Ty.getSizeInBits() == 64) {
2312 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2313 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2314 } else if (Ty.getSizeInBits() == 32) {
2315 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2316 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2317 } else
2318 return false;
2319
2320 I.setDesc(TII.get(TargetOpcode::COPY));
2321 return true;
2322 }
2323
2324 case TargetOpcode::G_ADD: {
2325 // Check if this is being fed by a G_ICMP on either side.
2326 //
2327 // (cmp pred, x, y) + z
2328 //
2329 // In the above case, when the cmp is true, we increment z by 1. So, we can
2330 // fold the add into the cset for the cmp by using cinc.
2331 //
2332 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2333 Register AddDst = I.getOperand(0).getReg();
2334 Register AddLHS = I.getOperand(1).getReg();
2335 Register AddRHS = I.getOperand(2).getReg();
2336 // Only handle scalars.
2337 LLT Ty = MRI.getType(AddLHS);
2338 if (Ty.isVector())
2339 return false;
2340 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2341 // bits.
2342 unsigned Size = Ty.getSizeInBits();
2343 if (Size != 32 && Size != 64)
2344 return false;
2345 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2346 if (!MRI.hasOneNonDBGUse(Reg))
2347 return nullptr;
2348 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2349 // compare.
2350 if (Size == 32)
2351 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2352 // We model scalar compares using 32-bit destinations right now.
2353 // If it's a 64-bit compare, it'll have 64-bit sources.
2354 Register ZExt;
2355 if (!mi_match(Reg, MRI,
2357 return nullptr;
2358 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2359 if (!Cmp ||
2360 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2361 return nullptr;
2362 return Cmp;
2363 };
2364 // Try to match
2365 // z + (cmp pred, x, y)
2366 MachineInstr *Cmp = MatchCmp(AddRHS);
2367 if (!Cmp) {
2368 // (cmp pred, x, y) + z
2369 std::swap(AddLHS, AddRHS);
2370 Cmp = MatchCmp(AddRHS);
2371 if (!Cmp)
2372 return false;
2373 }
2374 auto &PredOp = Cmp->getOperand(1);
2375 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2376 const AArch64CC::CondCode InvCC =
2379 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2380 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2381 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2382 I.eraseFromParent();
2383 return true;
2384 }
2385 case TargetOpcode::G_OR: {
2386 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2387 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2388 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2389 Register Dst = I.getOperand(0).getReg();
2390 LLT Ty = MRI.getType(Dst);
2391
2392 if (!Ty.isScalar())
2393 return false;
2394
2395 unsigned Size = Ty.getSizeInBits();
2396 if (Size != 32 && Size != 64)
2397 return false;
2398
2399 Register ShiftSrc;
2400 int64_t ShiftImm;
2401 Register MaskSrc;
2402 int64_t MaskImm;
2403 if (!mi_match(
2404 Dst, MRI,
2405 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2406 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2407 return false;
2408
2409 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2410 return false;
2411
2412 int64_t Immr = Size - ShiftImm;
2413 int64_t Imms = Size - ShiftImm - 1;
2414 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2415 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2416 I.eraseFromParent();
2417 return true;
2418 }
2419 case TargetOpcode::G_FENCE: {
2420 if (I.getOperand(1).getImm() == 0)
2421 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2422 else
2423 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2424 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2425 I.eraseFromParent();
2426 return true;
2427 }
2428 default:
2429 return false;
2430 }
2431}
2432
2433bool AArch64InstructionSelector::select(MachineInstr &I) {
2434 assert(I.getParent() && "Instruction should be in a basic block!");
2435 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2436
2437 MachineBasicBlock &MBB = *I.getParent();
2438 MachineFunction &MF = *MBB.getParent();
2440
2441 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2442 if (Subtarget->requiresStrictAlign()) {
2443 // We don't support this feature yet.
2444 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2445 return false;
2446 }
2447
2449
2450 unsigned Opcode = I.getOpcode();
2451 // G_PHI requires same handling as PHI
2452 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2453 // Certain non-generic instructions also need some special handling.
2454
2455 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2457
2458 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2459 const Register DefReg = I.getOperand(0).getReg();
2460 const LLT DefTy = MRI.getType(DefReg);
2461
2462 const RegClassOrRegBank &RegClassOrBank =
2463 MRI.getRegClassOrRegBank(DefReg);
2464
2465 const TargetRegisterClass *DefRC
2466 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2467 if (!DefRC) {
2468 if (!DefTy.isValid()) {
2469 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2470 return false;
2471 }
2472 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2473 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2474 if (!DefRC) {
2475 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2476 return false;
2477 }
2478 }
2479
2480 I.setDesc(TII.get(TargetOpcode::PHI));
2481
2482 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2483 }
2484
2485 if (I.isCopy())
2486 return selectCopy(I, TII, MRI, TRI, RBI);
2487
2488 if (I.isDebugInstr())
2489 return selectDebugInstr(I, MRI, RBI);
2490
2491 return true;
2492 }
2493
2494
2495 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2496 LLVM_DEBUG(
2497 dbgs() << "Generic instruction has unexpected implicit operands\n");
2498 return false;
2499 }
2500
2501 // Try to do some lowering before we start instruction selecting. These
2502 // lowerings are purely transformations on the input G_MIR and so selection
2503 // must continue after any modification of the instruction.
2504 if (preISelLower(I)) {
2505 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2506 }
2507
2508 // There may be patterns where the importer can't deal with them optimally,
2509 // but does select it to a suboptimal sequence so our custom C++ selection
2510 // code later never has a chance to work on it. Therefore, we have an early
2511 // selection attempt here to give priority to certain selection routines
2512 // over the imported ones.
2513 if (earlySelect(I))
2514 return true;
2515
2516 if (selectImpl(I, *CoverageInfo))
2517 return true;
2518
2519 LLT Ty =
2520 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2521
2522 switch (Opcode) {
2523 case TargetOpcode::G_SBFX:
2524 case TargetOpcode::G_UBFX: {
2525 static const unsigned OpcTable[2][2] = {
2526 {AArch64::UBFMWri, AArch64::UBFMXri},
2527 {AArch64::SBFMWri, AArch64::SBFMXri}};
2528 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2529 unsigned Size = Ty.getSizeInBits();
2530 unsigned Opc = OpcTable[IsSigned][Size == 64];
2531 auto Cst1 =
2532 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2533 assert(Cst1 && "Should have gotten a constant for src 1?");
2534 auto Cst2 =
2535 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2536 assert(Cst2 && "Should have gotten a constant for src 2?");
2537 auto LSB = Cst1->Value.getZExtValue();
2538 auto Width = Cst2->Value.getZExtValue();
2539 auto BitfieldInst =
2540 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2541 .addImm(LSB)
2542 .addImm(LSB + Width - 1);
2543 I.eraseFromParent();
2544 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2545 }
2546 case TargetOpcode::G_BRCOND:
2547 return selectCompareBranch(I, MF, MRI);
2548
2549 case TargetOpcode::G_BRINDIRECT: {
2550 I.setDesc(TII.get(AArch64::BR));
2552 }
2553
2554 case TargetOpcode::G_BRJT:
2555 return selectBrJT(I, MRI);
2556
2557 case AArch64::G_ADD_LOW: {
2558 // This op may have been separated from it's ADRP companion by the localizer
2559 // or some other code motion pass. Given that many CPUs will try to
2560 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2561 // which will later be expanded into an ADRP+ADD pair after scheduling.
2562 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2563 if (BaseMI->getOpcode() != AArch64::ADRP) {
2564 I.setDesc(TII.get(AArch64::ADDXri));
2565 I.addOperand(MachineOperand::CreateImm(0));
2567 }
2568 assert(TM.getCodeModel() == CodeModel::Small &&
2569 "Expected small code model");
2570 auto Op1 = BaseMI->getOperand(1);
2571 auto Op2 = I.getOperand(2);
2572 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2573 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2574 Op1.getTargetFlags())
2575 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2576 Op2.getTargetFlags());
2577 I.eraseFromParent();
2578 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2579 }
2580
2581 case TargetOpcode::G_FCONSTANT:
2582 case TargetOpcode::G_CONSTANT: {
2583 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2584
2585 const LLT s8 = LLT::scalar(8);
2586 const LLT s16 = LLT::scalar(16);
2587 const LLT s32 = LLT::scalar(32);
2588 const LLT s64 = LLT::scalar(64);
2589 const LLT s128 = LLT::scalar(128);
2590 const LLT p0 = LLT::pointer(0, 64);
2591
2592 const Register DefReg = I.getOperand(0).getReg();
2593 const LLT DefTy = MRI.getType(DefReg);
2594 const unsigned DefSize = DefTy.getSizeInBits();
2595 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2596
2597 // FIXME: Redundant check, but even less readable when factored out.
2598 if (isFP) {
2599 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2600 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2601 << " constant, expected: " << s16 << " or " << s32
2602 << " or " << s64 << " or " << s128 << '\n');
2603 return false;
2604 }
2605
2606 if (RB.getID() != AArch64::FPRRegBankID) {
2607 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2608 << " constant on bank: " << RB
2609 << ", expected: FPR\n");
2610 return false;
2611 }
2612
2613 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2614 // can be sure tablegen works correctly and isn't rescued by this code.
2615 // 0.0 is not covered by tablegen for FP128. So we will handle this
2616 // scenario in the code here.
2617 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2618 return false;
2619 } else {
2620 // s32 and s64 are covered by tablegen.
2621 if (Ty != p0 && Ty != s8 && Ty != s16) {
2622 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2623 << " constant, expected: " << s32 << ", " << s64
2624 << ", or " << p0 << '\n');
2625 return false;
2626 }
2627
2628 if (RB.getID() != AArch64::GPRRegBankID) {
2629 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2630 << " constant on bank: " << RB
2631 << ", expected: GPR\n");
2632 return false;
2633 }
2634 }
2635
2636 if (isFP) {
2637 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2638 // For 16, 64, and 128b values, emit a constant pool load.
2639 switch (DefSize) {
2640 default:
2641 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2642 case 32:
2643 case 64: {
2644 bool OptForSize = shouldOptForSize(&MF);
2645 const auto &TLI = MF.getSubtarget().getTargetLowering();
2646 // If TLI says that this fpimm is illegal, then we'll expand to a
2647 // constant pool load.
2648 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2649 EVT::getFloatingPointVT(DefSize), OptForSize))
2650 break;
2651 [[fallthrough]];
2652 }
2653 case 16:
2654 case 128: {
2655 auto *FPImm = I.getOperand(1).getFPImm();
2656 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2657 if (!LoadMI) {
2658 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2659 return false;
2660 }
2661 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2662 I.eraseFromParent();
2663 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2664 }
2665 }
2666
2667 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2668 // Either emit a FMOV, or emit a copy to emit a normal mov.
2669 const Register DefGPRReg = MRI.createVirtualRegister(
2670 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2671 MachineOperand &RegOp = I.getOperand(0);
2672 RegOp.setReg(DefGPRReg);
2673 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2674 MIB.buildCopy({DefReg}, {DefGPRReg});
2675
2676 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2677 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2678 return false;
2679 }
2680
2681 MachineOperand &ImmOp = I.getOperand(1);
2682 // FIXME: Is going through int64_t always correct?
2683 ImmOp.ChangeToImmediate(
2685 } else if (I.getOperand(1).isCImm()) {
2686 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2687 I.getOperand(1).ChangeToImmediate(Val);
2688 } else if (I.getOperand(1).isImm()) {
2689 uint64_t Val = I.getOperand(1).getImm();
2690 I.getOperand(1).ChangeToImmediate(Val);
2691 }
2692
2693 const unsigned MovOpc =
2694 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2695 I.setDesc(TII.get(MovOpc));
2697 return true;
2698 }
2699 case TargetOpcode::G_EXTRACT: {
2700 Register DstReg = I.getOperand(0).getReg();
2701 Register SrcReg = I.getOperand(1).getReg();
2702 LLT SrcTy = MRI.getType(SrcReg);
2703 LLT DstTy = MRI.getType(DstReg);
2704 (void)DstTy;
2705 unsigned SrcSize = SrcTy.getSizeInBits();
2706
2707 if (SrcTy.getSizeInBits() > 64) {
2708 // This should be an extract of an s128, which is like a vector extract.
2709 if (SrcTy.getSizeInBits() != 128)
2710 return false;
2711 // Only support extracting 64 bits from an s128 at the moment.
2712 if (DstTy.getSizeInBits() != 64)
2713 return false;
2714
2715 unsigned Offset = I.getOperand(2).getImm();
2716 if (Offset % 64 != 0)
2717 return false;
2718
2719 // Check we have the right regbank always.
2720 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2721 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2722 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2723
2724 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2725 auto NewI =
2726 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2727 .addUse(SrcReg, 0,
2728 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2729 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2730 AArch64::GPR64RegClass, NewI->getOperand(0));
2731 I.eraseFromParent();
2732 return true;
2733 }
2734
2735 // Emit the same code as a vector extract.
2736 // Offset must be a multiple of 64.
2737 unsigned LaneIdx = Offset / 64;
2738 MachineInstr *Extract = emitExtractVectorElt(
2739 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2740 if (!Extract)
2741 return false;
2742 I.eraseFromParent();
2743 return true;
2744 }
2745
2746 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2747 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2748 Ty.getSizeInBits() - 1);
2749
2750 if (SrcSize < 64) {
2751 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2752 "unexpected G_EXTRACT types");
2754 }
2755
2756 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2757 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2758 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2759 .addReg(DstReg, 0, AArch64::sub_32);
2760 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2761 AArch64::GPR32RegClass, MRI);
2762 I.getOperand(0).setReg(DstReg);
2763
2765 }
2766
2767 case TargetOpcode::G_INSERT: {
2768 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2769 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2770 unsigned DstSize = DstTy.getSizeInBits();
2771 // Larger inserts are vectors, same-size ones should be something else by
2772 // now (split up or turned into COPYs).
2773 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2774 return false;
2775
2776 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2777 unsigned LSB = I.getOperand(3).getImm();
2778 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2779 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2780 MachineInstrBuilder(MF, I).addImm(Width - 1);
2781
2782 if (DstSize < 64) {
2783 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2784 "unexpected G_INSERT types");
2786 }
2787
2788 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2789 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2790 TII.get(AArch64::SUBREG_TO_REG))
2791 .addDef(SrcReg)
2792 .addImm(0)
2793 .addUse(I.getOperand(2).getReg())
2794 .addImm(AArch64::sub_32);
2795 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2796 AArch64::GPR32RegClass, MRI);
2797 I.getOperand(2).setReg(SrcReg);
2798
2800 }
2801 case TargetOpcode::G_FRAME_INDEX: {
2802 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2803 if (Ty != LLT::pointer(0, 64)) {
2804 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2805 << ", expected: " << LLT::pointer(0, 64) << '\n');
2806 return false;
2807 }
2808 I.setDesc(TII.get(AArch64::ADDXri));
2809
2810 // MOs for a #0 shifted immediate.
2811 I.addOperand(MachineOperand::CreateImm(0));
2812 I.addOperand(MachineOperand::CreateImm(0));
2813
2815 }
2816
2817 case TargetOpcode::G_GLOBAL_VALUE: {
2818 const GlobalValue *GV = nullptr;
2819 unsigned OpFlags;
2820 if (I.getOperand(1).isSymbol()) {
2821 OpFlags = I.getOperand(1).getTargetFlags();
2822 // Currently only used by "RtLibUseGOT".
2823 assert(OpFlags == AArch64II::MO_GOT);
2824 } else {
2825 GV = I.getOperand(1).getGlobal();
2826 if (GV->isThreadLocal())
2827 return selectTLSGlobalValue(I, MRI);
2828 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2829 }
2830
2831 if (OpFlags & AArch64II::MO_GOT) {
2832 I.setDesc(TII.get(AArch64::LOADgot));
2833 I.getOperand(1).setTargetFlags(OpFlags);
2834 } else if (TM.getCodeModel() == CodeModel::Large &&
2835 !TM.isPositionIndependent()) {
2836 // Materialize the global using movz/movk instructions.
2837 materializeLargeCMVal(I, GV, OpFlags);
2838 I.eraseFromParent();
2839 return true;
2840 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2841 I.setDesc(TII.get(AArch64::ADR));
2842 I.getOperand(1).setTargetFlags(OpFlags);
2843 } else {
2844 I.setDesc(TII.get(AArch64::MOVaddr));
2845 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2846 MachineInstrBuilder MIB(MF, I);
2847 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2849 }
2851 }
2852
2853 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2854 return selectPtrAuthGlobalValue(I, MRI);
2855
2856 case TargetOpcode::G_ZEXTLOAD:
2857 case TargetOpcode::G_LOAD:
2858 case TargetOpcode::G_STORE: {
2859 GLoadStore &LdSt = cast<GLoadStore>(I);
2860 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2861 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2862
2863 if (PtrTy != LLT::pointer(0, 64)) {
2864 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2865 << ", expected: " << LLT::pointer(0, 64) << '\n');
2866 return false;
2867 }
2868
2869 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2870 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2871 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2872
2873 // Need special instructions for atomics that affect ordering.
2874 if (Order != AtomicOrdering::NotAtomic &&
2875 Order != AtomicOrdering::Unordered &&
2876 Order != AtomicOrdering::Monotonic) {
2877 assert(!isa<GZExtLoad>(LdSt));
2878 assert(MemSizeInBytes <= 8 &&
2879 "128-bit atomics should already be custom-legalized");
2880
2881 if (isa<GLoad>(LdSt)) {
2882 static constexpr unsigned LDAPROpcodes[] = {
2883 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2884 static constexpr unsigned LDAROpcodes[] = {
2885 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2886 ArrayRef<unsigned> Opcodes =
2887 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2888 ? LDAPROpcodes
2889 : LDAROpcodes;
2890 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2891 } else {
2892 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2893 AArch64::STLRW, AArch64::STLRX};
2894 Register ValReg = LdSt.getReg(0);
2895 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2896 // Emit a subreg copy of 32 bits.
2897 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2898 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2899 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2900 I.getOperand(0).setReg(NewVal);
2901 }
2902 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2903 }
2905 return true;
2906 }
2907
2908#ifndef NDEBUG
2909 const Register PtrReg = LdSt.getPointerReg();
2910 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2911 // Check that the pointer register is valid.
2912 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2913 "Load/Store pointer operand isn't a GPR");
2914 assert(MRI.getType(PtrReg).isPointer() &&
2915 "Load/Store pointer operand isn't a pointer");
2916#endif
2917
2918 const Register ValReg = LdSt.getReg(0);
2919 const LLT ValTy = MRI.getType(ValReg);
2920 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2921
2922 // The code below doesn't support truncating stores, so we need to split it
2923 // again.
2924 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2925 unsigned SubReg;
2926 LLT MemTy = LdSt.getMMO().getMemoryType();
2927 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2928 if (!getSubRegForClass(RC, TRI, SubReg))
2929 return false;
2930
2931 // Generate a subreg copy.
2932 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2933 .addReg(ValReg, 0, SubReg)
2934 .getReg(0);
2935 RBI.constrainGenericRegister(Copy, *RC, MRI);
2936 LdSt.getOperand(0).setReg(Copy);
2937 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2938 // If this is an any-extending load from the FPR bank, split it into a regular
2939 // load + extend.
2940 if (RB.getID() == AArch64::FPRRegBankID) {
2941 unsigned SubReg;
2942 LLT MemTy = LdSt.getMMO().getMemoryType();
2943 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2944 if (!getSubRegForClass(RC, TRI, SubReg))
2945 return false;
2946 Register OldDst = LdSt.getReg(0);
2947 Register NewDst =
2948 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2949 LdSt.getOperand(0).setReg(NewDst);
2950 MRI.setRegBank(NewDst, RB);
2951 // Generate a SUBREG_TO_REG to extend it.
2952 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2953 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2954 .addImm(0)
2955 .addUse(NewDst)
2956 .addImm(SubReg);
2957 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2958 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2959 MIB.setInstr(LdSt);
2960 }
2961 }
2962
2963 // Helper lambda for partially selecting I. Either returns the original
2964 // instruction with an updated opcode, or a new instruction.
2965 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2966 bool IsStore = isa<GStore>(I);
2967 const unsigned NewOpc =
2968 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2969 if (NewOpc == I.getOpcode())
2970 return nullptr;
2971 // Check if we can fold anything into the addressing mode.
2972 auto AddrModeFns =
2973 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2974 if (!AddrModeFns) {
2975 // Can't fold anything. Use the original instruction.
2976 I.setDesc(TII.get(NewOpc));
2977 I.addOperand(MachineOperand::CreateImm(0));
2978 return &I;
2979 }
2980
2981 // Folded something. Create a new instruction and return it.
2982 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2983 Register CurValReg = I.getOperand(0).getReg();
2984 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2985 NewInst.cloneMemRefs(I);
2986 for (auto &Fn : *AddrModeFns)
2987 Fn(NewInst);
2988 I.eraseFromParent();
2989 return &*NewInst;
2990 };
2991
2992 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2993 if (!LoadStore)
2994 return false;
2995
2996 // If we're storing a 0, use WZR/XZR.
2997 if (Opcode == TargetOpcode::G_STORE) {
2999 LoadStore->getOperand(0).getReg(), MRI);
3000 if (CVal && CVal->Value == 0) {
3001 switch (LoadStore->getOpcode()) {
3002 case AArch64::STRWui:
3003 case AArch64::STRHHui:
3004 case AArch64::STRBBui:
3005 LoadStore->getOperand(0).setReg(AArch64::WZR);
3006 break;
3007 case AArch64::STRXui:
3008 LoadStore->getOperand(0).setReg(AArch64::XZR);
3009 break;
3010 }
3011 }
3012 }
3013
3014 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3015 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3016 // The any/zextload from a smaller type to i32 should be handled by the
3017 // importer.
3018 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3019 return false;
3020 // If we have an extending load then change the load's type to be a
3021 // narrower reg and zero_extend with SUBREG_TO_REG.
3022 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3023 Register DstReg = LoadStore->getOperand(0).getReg();
3024 LoadStore->getOperand(0).setReg(LdReg);
3025
3026 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3027 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3028 .addImm(0)
3029 .addUse(LdReg)
3030 .addImm(AArch64::sub_32);
3031 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3032 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3033 MRI);
3034 }
3035 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3036 }
3037
3038 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3039 case TargetOpcode::G_INDEXED_SEXTLOAD:
3040 return selectIndexedExtLoad(I, MRI);
3041 case TargetOpcode::G_INDEXED_LOAD:
3042 return selectIndexedLoad(I, MRI);
3043 case TargetOpcode::G_INDEXED_STORE:
3044 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3045
3046 case TargetOpcode::G_LSHR:
3047 case TargetOpcode::G_ASHR:
3048 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3049 return selectVectorAshrLshr(I, MRI);
3050 [[fallthrough]];
3051 case TargetOpcode::G_SHL:
3052 if (Opcode == TargetOpcode::G_SHL &&
3053 MRI.getType(I.getOperand(0).getReg()).isVector())
3054 return selectVectorSHL(I, MRI);
3055
3056 // These shifts were legalized to have 64 bit shift amounts because we
3057 // want to take advantage of the selection patterns that assume the
3058 // immediates are s64s, however, selectBinaryOp will assume both operands
3059 // will have the same bit size.
3060 {
3061 Register SrcReg = I.getOperand(1).getReg();
3062 Register ShiftReg = I.getOperand(2).getReg();
3063 const LLT ShiftTy = MRI.getType(ShiftReg);
3064 const LLT SrcTy = MRI.getType(SrcReg);
3065 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3066 ShiftTy.getSizeInBits() == 64) {
3067 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3068 // Insert a subregister copy to implement a 64->32 trunc
3069 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3070 .addReg(ShiftReg, 0, AArch64::sub_32);
3071 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3072 I.getOperand(2).setReg(Trunc.getReg(0));
3073 }
3074 }
3075 [[fallthrough]];
3076 case TargetOpcode::G_OR: {
3077 // Reject the various things we don't support yet.
3078 if (unsupportedBinOp(I, RBI, MRI, TRI))
3079 return false;
3080
3081 const unsigned OpSize = Ty.getSizeInBits();
3082
3083 const Register DefReg = I.getOperand(0).getReg();
3084 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3085
3086 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3087 if (NewOpc == I.getOpcode())
3088 return false;
3089
3090 I.setDesc(TII.get(NewOpc));
3091 // FIXME: Should the type be always reset in setDesc?
3092
3093 // Now that we selected an opcode, we need to constrain the register
3094 // operands to use appropriate classes.
3096 }
3097
3098 case TargetOpcode::G_PTR_ADD: {
3099 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3100 I.eraseFromParent();
3101 return true;
3102 }
3103
3104 case TargetOpcode::G_SADDE:
3105 case TargetOpcode::G_UADDE:
3106 case TargetOpcode::G_SSUBE:
3107 case TargetOpcode::G_USUBE:
3108 case TargetOpcode::G_SADDO:
3109 case TargetOpcode::G_UADDO:
3110 case TargetOpcode::G_SSUBO:
3111 case TargetOpcode::G_USUBO:
3112 return selectOverflowOp(I, MRI);
3113
3114 case TargetOpcode::G_PTRMASK: {
3115 Register MaskReg = I.getOperand(2).getReg();
3116 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3117 // TODO: Implement arbitrary cases
3118 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3119 return false;
3120
3121 uint64_t Mask = *MaskVal;
3122 I.setDesc(TII.get(AArch64::ANDXri));
3123 I.getOperand(2).ChangeToImmediate(
3125
3127 }
3128 case TargetOpcode::G_PTRTOINT:
3129 case TargetOpcode::G_TRUNC: {
3130 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3131 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3132
3133 const Register DstReg = I.getOperand(0).getReg();
3134 const Register SrcReg = I.getOperand(1).getReg();
3135
3136 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3137 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3138
3139 if (DstRB.getID() != SrcRB.getID()) {
3140 LLVM_DEBUG(
3141 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3142 return false;
3143 }
3144
3145 if (DstRB.getID() == AArch64::GPRRegBankID) {
3146 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3147 if (!DstRC)
3148 return false;
3149
3150 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3151 if (!SrcRC)
3152 return false;
3153
3154 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3155 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3156 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3157 return false;
3158 }
3159
3160 if (DstRC == SrcRC) {
3161 // Nothing to be done
3162 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3163 SrcTy == LLT::scalar(64)) {
3164 llvm_unreachable("TableGen can import this case");
3165 return false;
3166 } else if (DstRC == &AArch64::GPR32RegClass &&
3167 SrcRC == &AArch64::GPR64RegClass) {
3168 I.getOperand(1).setSubReg(AArch64::sub_32);
3169 } else {
3170 LLVM_DEBUG(
3171 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3172 return false;
3173 }
3174
3175 I.setDesc(TII.get(TargetOpcode::COPY));
3176 return true;
3177 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3178 if (DstTy == LLT::fixed_vector(4, 16) &&
3179 SrcTy == LLT::fixed_vector(4, 32)) {
3180 I.setDesc(TII.get(AArch64::XTNv4i16));
3182 return true;
3183 }
3184
3185 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3186 MachineInstr *Extract = emitExtractVectorElt(
3187 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3188 if (!Extract)
3189 return false;
3190 I.eraseFromParent();
3191 return true;
3192 }
3193
3194 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3195 if (Opcode == TargetOpcode::G_PTRTOINT) {
3196 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3197 I.setDesc(TII.get(TargetOpcode::COPY));
3198 return selectCopy(I, TII, MRI, TRI, RBI);
3199 }
3200 }
3201
3202 return false;
3203 }
3204
3205 case TargetOpcode::G_ANYEXT: {
3206 if (selectUSMovFromExtend(I, MRI))
3207 return true;
3208
3209 const Register DstReg = I.getOperand(0).getReg();
3210 const Register SrcReg = I.getOperand(1).getReg();
3211
3212 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3213 if (RBDst.getID() != AArch64::GPRRegBankID) {
3214 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3215 << ", expected: GPR\n");
3216 return false;
3217 }
3218
3219 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3220 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3221 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3222 << ", expected: GPR\n");
3223 return false;
3224 }
3225
3226 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3227
3228 if (DstSize == 0) {
3229 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3230 return false;
3231 }
3232
3233 if (DstSize != 64 && DstSize > 32) {
3234 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3235 << ", expected: 32 or 64\n");
3236 return false;
3237 }
3238 // At this point G_ANYEXT is just like a plain COPY, but we need
3239 // to explicitly form the 64-bit value if any.
3240 if (DstSize > 32) {
3241 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3242 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3243 .addDef(ExtSrc)
3244 .addImm(0)
3245 .addUse(SrcReg)
3246 .addImm(AArch64::sub_32);
3247 I.getOperand(1).setReg(ExtSrc);
3248 }
3249 return selectCopy(I, TII, MRI, TRI, RBI);
3250 }
3251
3252 case TargetOpcode::G_ZEXT:
3253 case TargetOpcode::G_SEXT_INREG:
3254 case TargetOpcode::G_SEXT: {
3255 if (selectUSMovFromExtend(I, MRI))
3256 return true;
3257
3258 unsigned Opcode = I.getOpcode();
3259 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3260 const Register DefReg = I.getOperand(0).getReg();
3261 Register SrcReg = I.getOperand(1).getReg();
3262 const LLT DstTy = MRI.getType(DefReg);
3263 const LLT SrcTy = MRI.getType(SrcReg);
3264 unsigned DstSize = DstTy.getSizeInBits();
3265 unsigned SrcSize = SrcTy.getSizeInBits();
3266
3267 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3268 // extended is encoded in the imm.
3269 if (Opcode == TargetOpcode::G_SEXT_INREG)
3270 SrcSize = I.getOperand(2).getImm();
3271
3272 if (DstTy.isVector())
3273 return false; // Should be handled by imported patterns.
3274
3275 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3276 AArch64::GPRRegBankID &&
3277 "Unexpected ext regbank");
3278
3279 MachineInstr *ExtI;
3280
3281 // First check if we're extending the result of a load which has a dest type
3282 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3283 // GPR register on AArch64 and all loads which are smaller automatically
3284 // zero-extend the upper bits. E.g.
3285 // %v(s8) = G_LOAD %p, :: (load 1)
3286 // %v2(s32) = G_ZEXT %v(s8)
3287 if (!IsSigned) {
3288 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3289 bool IsGPR =
3290 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3291 if (LoadMI && IsGPR) {
3292 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3293 unsigned BytesLoaded = MemOp->getSize().getValue();
3294 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3295 return selectCopy(I, TII, MRI, TRI, RBI);
3296 }
3297
3298 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3299 // + SUBREG_TO_REG.
3300 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3301 Register SubregToRegSrc =
3302 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3303 const Register ZReg = AArch64::WZR;
3304 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3305 .addImm(0);
3306
3307 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3308 .addImm(0)
3309 .addUse(SubregToRegSrc)
3310 .addImm(AArch64::sub_32);
3311
3312 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3313 MRI)) {
3314 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3315 return false;
3316 }
3317
3318 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3319 MRI)) {
3320 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3321 return false;
3322 }
3323
3324 I.eraseFromParent();
3325 return true;
3326 }
3327 }
3328
3329 if (DstSize == 64) {
3330 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3331 // FIXME: Can we avoid manually doing this?
3332 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3333 MRI)) {
3334 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3335 << " operand\n");
3336 return false;
3337 }
3338 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3339 {&AArch64::GPR64RegClass}, {})
3340 .addImm(0)
3341 .addUse(SrcReg)
3342 .addImm(AArch64::sub_32)
3343 .getReg(0);
3344 }
3345
3346 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3347 {DefReg}, {SrcReg})
3348 .addImm(0)
3349 .addImm(SrcSize - 1);
3350 } else if (DstSize <= 32) {
3351 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3352 {DefReg}, {SrcReg})
3353 .addImm(0)
3354 .addImm(SrcSize - 1);
3355 } else {
3356 return false;
3357 }
3358
3360 I.eraseFromParent();
3361 return true;
3362 }
3363
3364 case TargetOpcode::G_SITOFP:
3365 case TargetOpcode::G_UITOFP:
3366 case TargetOpcode::G_FPTOSI:
3367 case TargetOpcode::G_FPTOUI: {
3368 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3369 SrcTy = MRI.getType(I.getOperand(1).getReg());
3370 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3371 if (NewOpc == Opcode)
3372 return false;
3373
3374 I.setDesc(TII.get(NewOpc));
3376 I.setFlags(MachineInstr::NoFPExcept);
3377
3378 return true;
3379 }
3380
3381 case TargetOpcode::G_FREEZE:
3382 return selectCopy(I, TII, MRI, TRI, RBI);
3383
3384 case TargetOpcode::G_INTTOPTR:
3385 // The importer is currently unable to import pointer types since they
3386 // didn't exist in SelectionDAG.
3387 return selectCopy(I, TII, MRI, TRI, RBI);
3388
3389 case TargetOpcode::G_BITCAST:
3390 // Imported SelectionDAG rules can handle every bitcast except those that
3391 // bitcast from a type to the same type. Ideally, these shouldn't occur
3392 // but we might not run an optimizer that deletes them. The other exception
3393 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3394 // of them.
3395 return selectCopy(I, TII, MRI, TRI, RBI);
3396
3397 case TargetOpcode::G_SELECT: {
3398 auto &Sel = cast<GSelect>(I);
3399 const Register CondReg = Sel.getCondReg();
3400 const Register TReg = Sel.getTrueReg();
3401 const Register FReg = Sel.getFalseReg();
3402
3403 if (tryOptSelect(Sel))
3404 return true;
3405
3406 // Make sure to use an unused vreg instead of wzr, so that the peephole
3407 // optimizations will be able to optimize these.
3408 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3409 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3410 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3412 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3413 return false;
3414 Sel.eraseFromParent();
3415 return true;
3416 }
3417 case TargetOpcode::G_ICMP: {
3418 if (Ty.isVector())
3419 return false;
3420
3421 if (Ty != LLT::scalar(32)) {
3422 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3423 << ", expected: " << LLT::scalar(32) << '\n');
3424 return false;
3425 }
3426
3427 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3428 const AArch64CC::CondCode InvCC =
3430 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3431 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3432 /*Src2=*/AArch64::WZR, InvCC, MIB);
3433 I.eraseFromParent();
3434 return true;
3435 }
3436
3437 case TargetOpcode::G_FCMP: {
3438 CmpInst::Predicate Pred =
3439 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3440 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3441 Pred) ||
3442 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3443 return false;
3444 I.eraseFromParent();
3445 return true;
3446 }
3447 case TargetOpcode::G_VASTART:
3448 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3449 : selectVaStartAAPCS(I, MF, MRI);
3450 case TargetOpcode::G_INTRINSIC:
3451 return selectIntrinsic(I, MRI);
3452 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3453 return selectIntrinsicWithSideEffects(I, MRI);
3454 case TargetOpcode::G_IMPLICIT_DEF: {
3455 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3456 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3457 const Register DstReg = I.getOperand(0).getReg();
3458 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3459 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3460 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3461 return true;
3462 }
3463 case TargetOpcode::G_BLOCK_ADDR: {
3464 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3465 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3466 I.eraseFromParent();
3467 return true;
3468 } else {
3469 I.setDesc(TII.get(AArch64::MOVaddrBA));
3470 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3471 I.getOperand(0).getReg())
3472 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3473 /* Offset */ 0, AArch64II::MO_PAGE)
3475 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3477 I.eraseFromParent();
3478 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3479 }
3480 }
3481 case AArch64::G_DUP: {
3482 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3483 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3484 // difficult because at RBS we may end up pessimizing the fpr case if we
3485 // decided to add an anyextend to fix this. Manual selection is the most
3486 // robust solution for now.
3487 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3488 AArch64::GPRRegBankID)
3489 return false; // We expect the fpr regbank case to be imported.
3490 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3491 if (VecTy == LLT::fixed_vector(8, 8))
3492 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3493 else if (VecTy == LLT::fixed_vector(16, 8))
3494 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3495 else if (VecTy == LLT::fixed_vector(4, 16))
3496 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3497 else if (VecTy == LLT::fixed_vector(8, 16))
3498 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3499 else
3500 return false;
3502 }
3503 case TargetOpcode::G_BUILD_VECTOR:
3504 return selectBuildVector(I, MRI);
3505 case TargetOpcode::G_MERGE_VALUES:
3506 return selectMergeValues(I, MRI);
3507 case TargetOpcode::G_UNMERGE_VALUES:
3508 return selectUnmergeValues(I, MRI);
3509 case TargetOpcode::G_SHUFFLE_VECTOR:
3510 return selectShuffleVector(I, MRI);
3511 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3512 return selectExtractElt(I, MRI);
3513 case TargetOpcode::G_CONCAT_VECTORS:
3514 return selectConcatVectors(I, MRI);
3515 case TargetOpcode::G_JUMP_TABLE:
3516 return selectJumpTable(I, MRI);
3517 case TargetOpcode::G_MEMCPY:
3518 case TargetOpcode::G_MEMCPY_INLINE:
3519 case TargetOpcode::G_MEMMOVE:
3520 case TargetOpcode::G_MEMSET:
3521 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3522 return selectMOPS(I, MRI);
3523 }
3524
3525 return false;
3526}
3527
3528bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3529 MachineIRBuilderState OldMIBState = MIB.getState();
3530 bool Success = select(I);
3531 MIB.setState(OldMIBState);
3532 return Success;
3533}
3534
3535bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3537 unsigned Mopcode;
3538 switch (GI.getOpcode()) {
3539 case TargetOpcode::G_MEMCPY:
3540 case TargetOpcode::G_MEMCPY_INLINE:
3541 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3542 break;
3543 case TargetOpcode::G_MEMMOVE:
3544 Mopcode = AArch64::MOPSMemoryMovePseudo;
3545 break;
3546 case TargetOpcode::G_MEMSET:
3547 // For tagged memset see llvm.aarch64.mops.memset.tag
3548 Mopcode = AArch64::MOPSMemorySetPseudo;
3549 break;
3550 }
3551
3552 auto &DstPtr = GI.getOperand(0);
3553 auto &SrcOrVal = GI.getOperand(1);
3554 auto &Size = GI.getOperand(2);
3555
3556 // Create copies of the registers that can be clobbered.
3557 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3558 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3559 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3560
3561 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3562 const auto &SrcValRegClass =
3563 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3564
3565 // Constrain to specific registers
3566 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3567 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3568 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3569
3570 MIB.buildCopy(DstPtrCopy, DstPtr);
3571 MIB.buildCopy(SrcValCopy, SrcOrVal);
3572 MIB.buildCopy(SizeCopy, Size);
3573
3574 // New instruction uses the copied registers because it must update them.
3575 // The defs are not used since they don't exist in G_MEM*. They are still
3576 // tied.
3577 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3578 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3579 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3580 if (IsSet) {
3581 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3582 {DstPtrCopy, SizeCopy, SrcValCopy});
3583 } else {
3584 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3585 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3586 {DstPtrCopy, SrcValCopy, SizeCopy});
3587 }
3588
3589 GI.eraseFromParent();
3590 return true;
3591}
3592
3593bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3595 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3596 Register JTAddr = I.getOperand(0).getReg();
3597 unsigned JTI = I.getOperand(1).getIndex();
3598 Register Index = I.getOperand(2).getReg();
3599
3600 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3601 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3602
3603 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3604 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3605 {TargetReg, ScratchReg}, {JTAddr, Index})
3606 .addJumpTableIndex(JTI);
3607 // Save the jump table info.
3608 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3609 {static_cast<int64_t>(JTI)});
3610 // Build the indirect branch.
3611 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3612 I.eraseFromParent();
3613 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3614}
3615
3616bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3618 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3619 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3620
3621 Register DstReg = I.getOperand(0).getReg();
3622 unsigned JTI = I.getOperand(1).getIndex();
3623 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3624 auto MovMI =
3625 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3626 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3628 I.eraseFromParent();
3629 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3630}
3631
3632bool AArch64InstructionSelector::selectTLSGlobalValue(
3634 if (!STI.isTargetMachO())
3635 return false;
3636 MachineFunction &MF = *I.getParent()->getParent();
3637 MF.getFrameInfo().setAdjustsStack(true);
3638
3639 const auto &GlobalOp = I.getOperand(1);
3640 assert(GlobalOp.getOffset() == 0 &&
3641 "Shouldn't have an offset on TLS globals!");
3642 const GlobalValue &GV = *GlobalOp.getGlobal();
3643
3644 auto LoadGOT =
3645 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3646 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3647
3648 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3649 {LoadGOT.getReg(0)})
3650 .addImm(0);
3651
3652 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3653 // TLS calls preserve all registers except those that absolutely must be
3654 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3655 // silly).
3656 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3657 .addUse(AArch64::X0, RegState::Implicit)
3658 .addDef(AArch64::X0, RegState::Implicit)
3659 .addRegMask(TRI.getTLSCallPreservedMask());
3660
3661 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3662 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3663 MRI);
3664 I.eraseFromParent();
3665 return true;
3666}
3667
3668MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3669 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3670 MachineIRBuilder &MIRBuilder) const {
3671 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3672
3673 auto BuildFn = [&](unsigned SubregIndex) {
3674 auto Ins =
3675 MIRBuilder
3676 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3677 .addImm(SubregIndex);
3680 return &*Ins;
3681 };
3682
3683 switch (EltSize) {
3684 case 8:
3685 return BuildFn(AArch64::bsub);
3686 case 16:
3687 return BuildFn(AArch64::hsub);
3688 case 32:
3689 return BuildFn(AArch64::ssub);
3690 case 64:
3691 return BuildFn(AArch64::dsub);
3692 default:
3693 return nullptr;
3694 }
3695}
3696
3698AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3699 MachineIRBuilder &MIB,
3700 MachineRegisterInfo &MRI) const {
3701 LLT DstTy = MRI.getType(DstReg);
3702 const TargetRegisterClass *RC =
3703 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3704 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3705 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3706 return nullptr;
3707 }
3708 unsigned SubReg = 0;
3709 if (!getSubRegForClass(RC, TRI, SubReg))
3710 return nullptr;
3711 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3712 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3713 << DstTy.getSizeInBits() << "\n");
3714 return nullptr;
3715 }
3716 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3717 .addReg(SrcReg, 0, SubReg);
3718 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3719 return Copy;
3720}
3721
3722bool AArch64InstructionSelector::selectMergeValues(
3724 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3725 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3726 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3727 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3728 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3729
3730 if (I.getNumOperands() != 3)
3731 return false;
3732
3733 // Merging 2 s64s into an s128.
3734 if (DstTy == LLT::scalar(128)) {
3735 if (SrcTy.getSizeInBits() != 64)
3736 return false;
3737 Register DstReg = I.getOperand(0).getReg();
3738 Register Src1Reg = I.getOperand(1).getReg();
3739 Register Src2Reg = I.getOperand(2).getReg();
3740 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3741 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3742 /* LaneIdx */ 0, RB, MIB);
3743 if (!InsMI)
3744 return false;
3745 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3746 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3747 if (!Ins2MI)
3748 return false;
3751 I.eraseFromParent();
3752 return true;
3753 }
3754
3755 if (RB.getID() != AArch64::GPRRegBankID)
3756 return false;
3757
3758 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3759 return false;
3760
3761 auto *DstRC = &AArch64::GPR64RegClass;
3762 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3763 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3764 TII.get(TargetOpcode::SUBREG_TO_REG))
3765 .addDef(SubToRegDef)
3766 .addImm(0)
3767 .addUse(I.getOperand(1).getReg())
3768 .addImm(AArch64::sub_32);
3769 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3770 // Need to anyext the second scalar before we can use bfm
3771 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3772 TII.get(TargetOpcode::SUBREG_TO_REG))
3773 .addDef(SubToRegDef2)
3774 .addImm(0)
3775 .addUse(I.getOperand(2).getReg())
3776 .addImm(AArch64::sub_32);
3777 MachineInstr &BFM =
3778 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3779 .addDef(I.getOperand(0).getReg())
3780 .addUse(SubToRegDef)
3781 .addUse(SubToRegDef2)
3782 .addImm(32)
3783 .addImm(31);
3784 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3785 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3787 I.eraseFromParent();
3788 return true;
3789}
3790
3791static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3792 const unsigned EltSize) {
3793 // Choose a lane copy opcode and subregister based off of the size of the
3794 // vector's elements.
3795 switch (EltSize) {
3796 case 8:
3797 CopyOpc = AArch64::DUPi8;
3798 ExtractSubReg = AArch64::bsub;
3799 break;
3800 case 16:
3801 CopyOpc = AArch64::DUPi16;
3802 ExtractSubReg = AArch64::hsub;
3803 break;
3804 case 32:
3805 CopyOpc = AArch64::DUPi32;
3806 ExtractSubReg = AArch64::ssub;
3807 break;
3808 case 64:
3809 CopyOpc = AArch64::DUPi64;
3810 ExtractSubReg = AArch64::dsub;
3811 break;
3812 default:
3813 // Unknown size, bail out.
3814 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3815 return false;
3816 }
3817 return true;
3818}
3819
3820MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3821 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3822 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3823 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3824 unsigned CopyOpc = 0;
3825 unsigned ExtractSubReg = 0;
3826 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3827 LLVM_DEBUG(
3828 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3829 return nullptr;
3830 }
3831
3832 const TargetRegisterClass *DstRC =
3833 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3834 if (!DstRC) {
3835 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3836 return nullptr;
3837 }
3838
3839 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3840 const LLT &VecTy = MRI.getType(VecReg);
3841 const TargetRegisterClass *VecRC =
3842 getRegClassForTypeOnBank(VecTy, VecRB, true);
3843 if (!VecRC) {
3844 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3845 return nullptr;
3846 }
3847
3848 // The register that we're going to copy into.
3849 Register InsertReg = VecReg;
3850 if (!DstReg)
3851 DstReg = MRI.createVirtualRegister(DstRC);
3852 // If the lane index is 0, we just use a subregister COPY.
3853 if (LaneIdx == 0) {
3854 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3855 .addReg(VecReg, 0, ExtractSubReg);
3856 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3857 return &*Copy;
3858 }
3859
3860 // Lane copies require 128-bit wide registers. If we're dealing with an
3861 // unpacked vector, then we need to move up to that width. Insert an implicit
3862 // def and a subregister insert to get us there.
3863 if (VecTy.getSizeInBits() != 128) {
3864 MachineInstr *ScalarToVector = emitScalarToVector(
3865 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3866 if (!ScalarToVector)
3867 return nullptr;
3868 InsertReg = ScalarToVector->getOperand(0).getReg();
3869 }
3870
3871 MachineInstr *LaneCopyMI =
3872 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3873 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3874
3875 // Make sure that we actually constrain the initial copy.
3876 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3877 return LaneCopyMI;
3878}
3879
3880bool AArch64InstructionSelector::selectExtractElt(
3882 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3883 "unexpected opcode!");
3884 Register DstReg = I.getOperand(0).getReg();
3885 const LLT NarrowTy = MRI.getType(DstReg);
3886 const Register SrcReg = I.getOperand(1).getReg();
3887 const LLT WideTy = MRI.getType(SrcReg);
3888 (void)WideTy;
3889 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3890 "source register size too small!");
3891 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3892
3893 // Need the lane index to determine the correct copy opcode.
3894 MachineOperand &LaneIdxOp = I.getOperand(2);
3895 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3896
3897 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3898 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3899 return false;
3900 }
3901
3902 // Find the index to extract from.
3903 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3904 if (!VRegAndVal)
3905 return false;
3906 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3907
3908
3909 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3910 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3911 LaneIdx, MIB);
3912 if (!Extract)
3913 return false;
3914
3915 I.eraseFromParent();
3916 return true;
3917}
3918
3919bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3921 unsigned NumElts = I.getNumOperands() - 1;
3922 Register SrcReg = I.getOperand(NumElts).getReg();
3923 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3924 const LLT SrcTy = MRI.getType(SrcReg);
3925
3926 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3927 if (SrcTy.getSizeInBits() > 128) {
3928 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3929 return false;
3930 }
3931
3932 // We implement a split vector operation by treating the sub-vectors as
3933 // scalars and extracting them.
3934 const RegisterBank &DstRB =
3935 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3936 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3937 Register Dst = I.getOperand(OpIdx).getReg();
3938 MachineInstr *Extract =
3939 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3940 if (!Extract)
3941 return false;
3942 }
3943 I.eraseFromParent();
3944 return true;
3945}
3946
3947bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
3949 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3950 "unexpected opcode");
3951
3952 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3953 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3954 AArch64::FPRRegBankID ||
3955 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3956 AArch64::FPRRegBankID) {
3957 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3958 "currently unsupported.\n");
3959 return false;
3960 }
3961
3962 // The last operand is the vector source register, and every other operand is
3963 // a register to unpack into.
3964 unsigned NumElts = I.getNumOperands() - 1;
3965 Register SrcReg = I.getOperand(NumElts).getReg();
3966 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3967 const LLT WideTy = MRI.getType(SrcReg);
3968 (void)WideTy;
3969 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3970 "can only unmerge from vector or s128 types!");
3971 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3972 "source register size too small!");
3973
3974 if (!NarrowTy.isScalar())
3975 return selectSplitVectorUnmerge(I, MRI);
3976
3977 // Choose a lane copy opcode and subregister based off of the size of the
3978 // vector's elements.
3979 unsigned CopyOpc = 0;
3980 unsigned ExtractSubReg = 0;
3981 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3982 return false;
3983
3984 // Set up for the lane copies.
3985 MachineBasicBlock &MBB = *I.getParent();
3986
3987 // Stores the registers we'll be copying from.
3988 SmallVector<Register, 4> InsertRegs;
3989
3990 // We'll use the first register twice, so we only need NumElts-1 registers.
3991 unsigned NumInsertRegs = NumElts - 1;
3992
3993 // If our elements fit into exactly 128 bits, then we can copy from the source
3994 // directly. Otherwise, we need to do a bit of setup with some subregister
3995 // inserts.
3996 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3997 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3998 } else {
3999 // No. We have to perform subregister inserts. For each insert, create an
4000 // implicit def and a subregister insert, and save the register we create.
4001 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4002 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4003 *RBI.getRegBank(SrcReg, MRI, TRI));
4004 unsigned SubReg = 0;
4005 bool Found = getSubRegForClass(RC, TRI, SubReg);
4006 (void)Found;
4007 assert(Found && "expected to find last operand's subeg idx");
4008 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4009 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4010 MachineInstr &ImpDefMI =
4011 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4012 ImpDefReg);
4013
4014 // Now, create the subregister insert from SrcReg.
4015 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4016 MachineInstr &InsMI =
4017 *BuildMI(MBB, I, I.getDebugLoc(),
4018 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4019 .addUse(ImpDefReg)
4020 .addUse(SrcReg)
4021 .addImm(SubReg);
4022
4023 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4025
4026 // Save the register so that we can copy from it after.
4027 InsertRegs.push_back(InsertReg);
4028 }
4029 }
4030
4031 // Now that we've created any necessary subregister inserts, we can
4032 // create the copies.
4033 //
4034 // Perform the first copy separately as a subregister copy.
4035 Register CopyTo = I.getOperand(0).getReg();
4036 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4037 .addReg(InsertRegs[0], 0, ExtractSubReg);
4038 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4039
4040 // Now, perform the remaining copies as vector lane copies.
4041 unsigned LaneIdx = 1;
4042 for (Register InsReg : InsertRegs) {
4043 Register CopyTo = I.getOperand(LaneIdx).getReg();
4044 MachineInstr &CopyInst =
4045 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4046 .addUse(InsReg)
4047 .addImm(LaneIdx);
4048 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4049 ++LaneIdx;
4050 }
4051
4052 // Separately constrain the first copy's destination. Because of the
4053 // limitation in constrainOperandRegClass, we can't guarantee that this will
4054 // actually be constrained. So, do it ourselves using the second operand.
4055 const TargetRegisterClass *RC =
4056 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4057 if (!RC) {
4058 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4059 return false;
4060 }
4061
4062 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4063 I.eraseFromParent();
4064 return true;
4065}
4066
4067bool AArch64InstructionSelector::selectConcatVectors(
4069 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4070 "Unexpected opcode");
4071 Register Dst = I.getOperand(0).getReg();
4072 Register Op1 = I.getOperand(1).getReg();
4073 Register Op2 = I.getOperand(2).getReg();
4074 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4075 if (!ConcatMI)
4076 return false;
4077 I.eraseFromParent();
4078 return true;
4079}
4080
4081unsigned
4082AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4083 MachineFunction &MF) const {
4084 Type *CPTy = CPVal->getType();
4085 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4086
4088 return MCP->getConstantPoolIndex(CPVal, Alignment);
4089}
4090
4091MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4092 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4093 const TargetRegisterClass *RC;
4094 unsigned Opc;
4095 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4096 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4097 switch (Size) {
4098 case 16:
4099 RC = &AArch64::FPR128RegClass;
4100 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4101 break;
4102 case 8:
4103 RC = &AArch64::FPR64RegClass;
4104 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4105 break;
4106 case 4:
4107 RC = &AArch64::FPR32RegClass;
4108 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4109 break;
4110 case 2:
4111 RC = &AArch64::FPR16RegClass;
4112 Opc = AArch64::LDRHui;
4113 break;
4114 default:
4115 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4116 << *CPVal->getType());
4117 return nullptr;
4118 }
4119
4120 MachineInstr *LoadMI = nullptr;
4121 auto &MF = MIRBuilder.getMF();
4122 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4123 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4124 // Use load(literal) for tiny code model.
4125 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4126 } else {
4127 auto Adrp =
4128 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4129 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4130
4131 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4132 .addConstantPoolIndex(
4134
4136 }
4137
4139 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4141 Size, Align(Size)));
4143 return LoadMI;
4144}
4145
4146/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4147/// size and RB.
4148static std::pair<unsigned, unsigned>
4149getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4150 unsigned Opc, SubregIdx;
4151 if (RB.getID() == AArch64::GPRRegBankID) {
4152 if (EltSize == 8) {
4153 Opc = AArch64::INSvi8gpr;
4154 SubregIdx = AArch64::bsub;
4155 } else if (EltSize == 16) {
4156 Opc = AArch64::INSvi16gpr;
4157 SubregIdx = AArch64::ssub;
4158 } else if (EltSize == 32) {
4159 Opc = AArch64::INSvi32gpr;
4160 SubregIdx = AArch64::ssub;
4161 } else if (EltSize == 64) {
4162 Opc = AArch64::INSvi64gpr;
4163 SubregIdx = AArch64::dsub;
4164 } else {
4165 llvm_unreachable("invalid elt size!");
4166 }
4167 } else {
4168 if (EltSize == 8) {
4169 Opc = AArch64::INSvi8lane;
4170 SubregIdx = AArch64::bsub;
4171 } else if (EltSize == 16) {
4172 Opc = AArch64::INSvi16lane;
4173 SubregIdx = AArch64::hsub;
4174 } else if (EltSize == 32) {
4175 Opc = AArch64::INSvi32lane;
4176 SubregIdx = AArch64::ssub;
4177 } else if (EltSize == 64) {
4178 Opc = AArch64::INSvi64lane;
4179 SubregIdx = AArch64::dsub;
4180 } else {
4181 llvm_unreachable("invalid elt size!");
4182 }
4183 }
4184 return std::make_pair(Opc, SubregIdx);
4185}
4186
4187MachineInstr *AArch64InstructionSelector::emitInstr(
4188 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4189 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4190 const ComplexRendererFns &RenderFns) const {
4191 assert(Opcode && "Expected an opcode?");
4192 assert(!isPreISelGenericOpcode(Opcode) &&
4193 "Function should only be used to produce selected instructions!");
4194 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4195 if (RenderFns)
4196 for (auto &Fn : *RenderFns)
4197 Fn(MI);
4199 return &*MI;
4200}
4201
4202MachineInstr *AArch64InstructionSelector::emitAddSub(
4203 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4204 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4205 MachineIRBuilder &MIRBuilder) const {
4206 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4207 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4208 auto Ty = MRI.getType(LHS.getReg());
4209 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4210 unsigned Size = Ty.getSizeInBits();
4211 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4212 bool Is32Bit = Size == 32;
4213
4214 // INSTRri form with positive arithmetic immediate.
4215 if (auto Fns = selectArithImmed(RHS))
4216 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4217 MIRBuilder, Fns);
4218
4219 // INSTRri form with negative arithmetic immediate.
4220 if (auto Fns = selectNegArithImmed(RHS))
4221 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4222 MIRBuilder, Fns);
4223
4224 // INSTRrx form.
4225 if (auto Fns = selectArithExtendedRegister(RHS))
4226 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4227 MIRBuilder, Fns);
4228
4229 // INSTRrs form.
4230 if (auto Fns = selectShiftedRegister(RHS))
4231 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4232 MIRBuilder, Fns);
4233 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4234 MIRBuilder);
4235}
4236
4238AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4239 MachineOperand &RHS,
4240 MachineIRBuilder &MIRBuilder) const {
4241 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4242 {{AArch64::ADDXri, AArch64::ADDWri},
4243 {AArch64::ADDXrs, AArch64::ADDWrs},
4244 {AArch64::ADDXrr, AArch64::ADDWrr},
4245 {AArch64::SUBXri, AArch64::SUBWri},
4246 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4247 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4248}
4249
4251AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4252 MachineOperand &RHS,
4253 MachineIRBuilder &MIRBuilder) const {
4254 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4255 {{AArch64::ADDSXri, AArch64::ADDSWri},
4256 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4257 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4258 {AArch64::SUBSXri, AArch64::SUBSWri},
4259 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4260 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4261}
4262
4264AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4265 MachineOperand &RHS,
4266 MachineIRBuilder &MIRBuilder) const {
4267 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4268 {{AArch64::SUBSXri, AArch64::SUBSWri},
4269 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4270 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4271 {AArch64::ADDSXri, AArch64::ADDSWri},
4272 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4273 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4274}
4275
4277AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4278 MachineOperand &RHS,
4279 MachineIRBuilder &MIRBuilder) const {
4280 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4281 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4282 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4283 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4284 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4285}
4286
4288AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4289 MachineOperand &RHS,
4290 MachineIRBuilder &MIRBuilder) const {
4291 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4292 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4293 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4294 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4295 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4296}
4297
4299AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4300 MachineIRBuilder &MIRBuilder) const {
4301 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4302 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4303 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4304 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4305}
4306
4308AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4309 MachineIRBuilder &MIRBuilder) const {
4310 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4311 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4312 LLT Ty = MRI.getType(LHS.getReg());
4313 unsigned RegSize = Ty.getSizeInBits();
4314 bool Is32Bit = (RegSize == 32);
4315 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4316 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4317 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4318 // ANDS needs a logical immediate for its immediate form. Check if we can
4319 // fold one in.
4320 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4321 int64_t Imm = ValAndVReg->Value.getSExtValue();
4322
4324 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4327 return &*TstMI;
4328 }
4329 }
4330
4331 if (auto Fns = selectLogicalShiftedRegister(RHS))
4332 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4333 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4334}
4335
4336MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4337 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4338 MachineIRBuilder &MIRBuilder) const {
4339 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4340 assert(Predicate.isPredicate() && "Expected predicate?");
4341 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4342 LLT CmpTy = MRI.getType(LHS.getReg());
4343 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4344 unsigned Size = CmpTy.getSizeInBits();
4345 (void)Size;
4346 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4347 // Fold the compare into a cmn or tst if possible.
4348 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4349 return FoldCmp;
4350 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4351 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4352}
4353
4354MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4355 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4356 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4357#ifndef NDEBUG
4358 LLT Ty = MRI.getType(Dst);
4359 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4360 "Expected a 32-bit scalar register?");
4361#endif
4362 const Register ZReg = AArch64::WZR;
4363 AArch64CC::CondCode CC1, CC2;
4364 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4365 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4366 if (CC2 == AArch64CC::AL)
4367 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4368 MIRBuilder);
4369 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4370 Register Def1Reg = MRI.createVirtualRegister(RC);
4371 Register Def2Reg = MRI.createVirtualRegister(RC);
4372 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4373 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4374 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4375 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4377 return &*OrMI;
4378}
4379
4380MachineInstr *AArch64InstructionSelector::emitFPCompare(
4381 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4382 std::optional<CmpInst::Predicate> Pred) const {
4383 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4384 LLT Ty = MRI.getType(LHS);
4385 if (Ty.isVector())
4386 return nullptr;
4387 unsigned OpSize = Ty.getSizeInBits();
4388 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4389
4390 // If this is a compare against +0.0, then we don't have
4391 // to explicitly materialize a constant.
4392 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4393 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4394
4395 auto IsEqualityPred = [](CmpInst::Predicate P) {
4396 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4398 };
4399 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4400 // Try commutating the operands.
4401 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4402 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4403 ShouldUseImm = true;
4404 std::swap(LHS, RHS);
4405 }
4406 }
4407 unsigned CmpOpcTbl[2][3] = {
4408 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4409 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4410 unsigned CmpOpc =
4411 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4412
4413 // Partially build the compare. Decide if we need to add a use for the
4414 // third operand based off whether or not we're comparing against 0.0.
4415 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4417 if (!ShouldUseImm)
4418 CmpMI.addUse(RHS);
4420 return &*CmpMI;
4421}
4422
4423MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4424 std::optional<Register> Dst, Register Op1, Register Op2,
4425 MachineIRBuilder &MIRBuilder) const {
4426 // We implement a vector concat by:
4427 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4428 // 2. Insert the upper vector into the destination's upper element
4429 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4430 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4431
4432 const LLT Op1Ty = MRI.getType(Op1);
4433 const LLT Op2Ty = MRI.getType(Op2);
4434
4435 if (Op1Ty != Op2Ty) {
4436 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4437 return nullptr;
4438 }
4439 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4440
4441 if (Op1Ty.getSizeInBits() >= 128) {
4442 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4443 return nullptr;
4444 }
4445
4446 // At the moment we just support 64 bit vector concats.
4447 if (Op1Ty.getSizeInBits() != 64) {
4448 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4449 return nullptr;
4450 }
4451
4452 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4453 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4454 const TargetRegisterClass *DstRC =
4455 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4456
4457 MachineInstr *WidenedOp1 =
4458 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4459 MachineInstr *WidenedOp2 =
4460 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4461 if (!WidenedOp1 || !WidenedOp2) {
4462 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4463 return nullptr;
4464 }
4465
4466 // Now do the insert of the upper element.
4467 unsigned InsertOpc, InsSubRegIdx;
4468 std::tie(InsertOpc, InsSubRegIdx) =
4469 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4470
4471 if (!Dst)
4472 Dst = MRI.createVirtualRegister(DstRC);
4473 auto InsElt =
4474 MIRBuilder
4475 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4476 .addImm(1) /* Lane index */
4477 .addUse(WidenedOp2->getOperand(0).getReg())
4478 .addImm(0);
4480 return &*InsElt;
4481}
4482
4484AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4485 Register Src2, AArch64CC::CondCode Pred,
4486 MachineIRBuilder &MIRBuilder) const {
4487 auto &MRI = *MIRBuilder.getMRI();
4488 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4489 // If we used a register class, then this won't necessarily have an LLT.
4490 // Compute the size based off whether or not we have a class or bank.
4491 unsigned Size;
4492 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4493 Size = TRI.getRegSizeInBits(*RC);
4494 else
4495 Size = MRI.getType(Dst).getSizeInBits();
4496 // Some opcodes use s1.
4497 assert(Size <= 64 && "Expected 64 bits or less only!");
4498 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4499 unsigned Opc = OpcTable[Size == 64];
4500 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4502 return &*CSINC;
4503}
4504
4505MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4506 Register CarryReg) {
4508 unsigned Opcode = I.getOpcode();
4509
4510 // If the instruction is a SUB, we need to negate the carry,
4511 // because borrowing is indicated by carry-flag == 0.
4512 bool NeedsNegatedCarry =
4513 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4514
4515 // If the previous instruction will already produce the correct carry, do not
4516 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4517 // generated during legalization of wide add/sub. This optimization depends on
4518 // these sequences not being interrupted by other instructions.
4519 // We have to select the previous instruction before the carry-using
4520 // instruction is deleted by the calling function, otherwise the previous
4521 // instruction might become dead and would get deleted.
4522 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4523 if (SrcMI == I.getPrevNode()) {
4524 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4525 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4526 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4527 CarrySrcMI->isUnsigned() &&
4528 CarrySrcMI->getCarryOutReg() == CarryReg &&
4529 selectAndRestoreState(*SrcMI))
4530 return nullptr;
4531 }
4532 }
4533
4534 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4535
4536 if (NeedsNegatedCarry) {
4537 // (0 - Carry) sets !C in NZCV when Carry == 1
4538 Register ZReg = AArch64::WZR;
4539 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4540 }
4541
4542 // (Carry - 1) sets !C in NZCV when Carry == 0
4543 auto Fns = select12BitValueWithLeftShift(1);
4544 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4545}
4546
4547bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4549 auto &CarryMI = cast<GAddSubCarryOut>(I);
4550
4551 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4552 // Set NZCV carry according to carry-in VReg
4553 emitCarryIn(I, CarryInMI->getCarryInReg());
4554 }
4555
4556 // Emit the operation and get the correct condition code.
4557 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4558 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4559
4560 Register CarryOutReg = CarryMI.getCarryOutReg();
4561
4562 // Don't convert carry-out to VReg if it is never used
4563 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4564 // Now, put the overflow result in the register given by the first operand
4565 // to the overflow op. CSINC increments the result when the predicate is
4566 // false, so to get the increment when it's true, we need to use the
4567 // inverse. In this case, we want to increment when carry is set.
4568 Register ZReg = AArch64::WZR;
4569 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4570 getInvertedCondCode(OpAndCC.second), MIB);
4571 }
4572
4573 I.eraseFromParent();
4574 return true;
4575}
4576
4577std::pair<MachineInstr *, AArch64CC::CondCode>
4578AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4579 MachineOperand &LHS,
4580 MachineOperand &RHS,
4581 MachineIRBuilder &MIRBuilder) const {
4582 switch (Opcode) {
4583 default:
4584 llvm_unreachable("Unexpected opcode!");
4585 case TargetOpcode::G_SADDO:
4586 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4587 case TargetOpcode::G_UADDO:
4588 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4589 case TargetOpcode::G_SSUBO:
4590 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4591 case TargetOpcode::G_USUBO:
4592 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4593 case TargetOpcode::G_SADDE:
4594 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4595 case TargetOpcode::G_UADDE:
4596 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4597 case TargetOpcode::G_SSUBE:
4598 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4599 case TargetOpcode::G_USUBE:
4600 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4601 }
4602}
4603
4604/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4605/// expressed as a conjunction.
4606/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4607/// changing the conditions on the CMP tests.
4608/// (this means we can call emitConjunctionRec() with
4609/// Negate==true on this sub-tree)
4610/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4611/// cannot do the negation naturally. We are required to
4612/// emit the subtree first in this case.
4613/// \param WillNegate Is true if are called when the result of this
4614/// subexpression must be negated. This happens when the
4615/// outer expression is an OR. We can use this fact to know
4616/// that we have a double negation (or (or ...) ...) that
4617/// can be implemented for free.
4618static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4619 bool WillNegate, MachineRegisterInfo &MRI,
4620 unsigned Depth = 0) {
4621 if (!MRI.hasOneNonDBGUse(Val))
4622 return false;
4623 MachineInstr *ValDef = MRI.getVRegDef(Val);
4624 unsigned Opcode = ValDef->getOpcode();
4625 if (isa<GAnyCmp>(ValDef)) {
4626 CanNegate = true;
4627 MustBeFirst = false;
4628 return true;
4629 }
4630 // Protect against exponential runtime and stack overflow.
4631 if (Depth > 6)
4632 return false;
4633 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4634 bool IsOR = Opcode == TargetOpcode::G_OR;
4635 Register O0 = ValDef->getOperand(1).getReg();
4636 Register O1 = ValDef->getOperand(2).getReg();
4637 bool CanNegateL;
4638 bool MustBeFirstL;
4639 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4640 return false;
4641 bool CanNegateR;
4642 bool MustBeFirstR;
4643 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4644 return false;
4645
4646 if (MustBeFirstL && MustBeFirstR)
4647 return false;
4648
4649 if (IsOR) {
4650 // For an OR expression we need to be able to naturally negate at least
4651 // one side or we cannot do the transformation at all.
4652 if (!CanNegateL && !CanNegateR)
4653 return false;
4654 // If we the result of the OR will be negated and we can naturally negate
4655 // the leaves, then this sub-tree as a whole negates naturally.
4656 CanNegate = WillNegate && CanNegateL && CanNegateR;
4657 // If we cannot naturally negate the whole sub-tree, then this must be
4658 // emitted first.
4659 MustBeFirst = !CanNegate;
4660 } else {
4661 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4662 // We cannot naturally negate an AND operation.
4663 CanNegate = false;
4664 MustBeFirst = MustBeFirstL || MustBeFirstR;
4665 }
4666 return true;
4667 }
4668 return false;
4669}
4670
4671MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4674 MachineIRBuilder &MIB) const {
4675 auto &MRI = *MIB.getMRI();
4676 LLT OpTy = MRI.getType(LHS);
4677 unsigned CCmpOpc;
4678 std::optional<ValueAndVReg> C;
4680 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4682 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4683 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4684 else if (C->Value.ule(31))
4685 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4686 else
4687 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4688 } else {
4689 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4690 OpTy.getSizeInBits() == 64);
4691 switch (OpTy.getSizeInBits()) {
4692 case 16:
4693 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4694 CCmpOpc = AArch64::FCCMPHrr;
4695 break;
4696 case 32:
4697 CCmpOpc = AArch64::FCCMPSrr;
4698 break;
4699 case 64:
4700 CCmpOpc = AArch64::FCCMPDrr;
4701 break;
4702 default:
4703 return nullptr;
4704 }
4705 }
4707 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4708 auto CCmp =
4709 MIB.buildInstr(CCmpOpc, {}, {LHS});
4710 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4711 CCmp.addImm(C->Value.getZExtValue());
4712 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4713 CCmp.addImm(C->Value.abs().getZExtValue());
4714 else
4715 CCmp.addReg(RHS);
4716 CCmp.addImm(NZCV).addImm(Predicate);
4718 return &*CCmp;
4719}
4720
4721MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4722 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4723 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4724 // We're at a tree leaf, produce a conditional comparison operation.
4725 auto &MRI = *MIB.getMRI();
4726 MachineInstr *ValDef = MRI.getVRegDef(Val);
4727 unsigned Opcode = ValDef->getOpcode();
4728 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4729 Register LHS = Cmp->getLHSReg();
4730 Register RHS = Cmp->getRHSReg();
4731 CmpInst::Predicate CC = Cmp->getCond();
4732 if (Negate)
4734 if (isa<GICmp>(Cmp)) {
4736 } else {
4737 // Handle special FP cases.
4738 AArch64CC::CondCode ExtraCC;
4739 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4740 // Some floating point conditions can't be tested with a single condition
4741 // code. Construct an additional comparison in this case.
4742 if (ExtraCC != AArch64CC::AL) {
4743 MachineInstr *ExtraCmp;
4744 if (!CCOp)
4745 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4746 else
4747 ExtraCmp =
4748 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4749 CCOp = ExtraCmp->getOperand(0).getReg();
4750 Predicate = ExtraCC;
4751 }
4752 }
4753
4754 // Produce a normal comparison if we are first in the chain
4755 if (!CCOp) {
4756 auto Dst = MRI.cloneVirtualRegister(LHS);
4757 if (isa<GICmp>(Cmp))
4758 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4759 return emitFPCompare(Cmp->getOperand(2).getReg(),
4760 Cmp->getOperand(3).getReg(), MIB);
4761 }
4762 // Otherwise produce a ccmp.
4763 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4764 }
4765 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4766
4767 bool IsOR = Opcode == TargetOpcode::G_OR;
4768
4769 Register LHS = ValDef->getOperand(1).getReg();
4770 bool CanNegateL;
4771 bool MustBeFirstL;
4772 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4773 assert(ValidL && "Valid conjunction/disjunction tree");
4774 (void)ValidL;
4775
4776 Register RHS = ValDef->getOperand(2).getReg();
4777 bool CanNegateR;
4778 bool MustBeFirstR;
4779 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4780 assert(ValidR && "Valid conjunction/disjunction tree");
4781 (void)ValidR;
4782
4783 // Swap sub-tree that must come first to the right side.
4784 if (MustBeFirstL) {
4785 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4786 std::swap(LHS, RHS);
4787 std::swap(CanNegateL, CanNegateR);
4788 std::swap(MustBeFirstL, MustBeFirstR);
4789 }
4790
4791 bool NegateR;
4792 bool NegateAfterR;
4793 bool NegateL;
4794 bool NegateAfterAll;
4795 if (Opcode == TargetOpcode::G_OR) {
4796 // Swap the sub-tree that we can negate naturally to the left.
4797 if (!CanNegateL) {
4798 assert(CanNegateR && "at least one side must be negatable");
4799 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4800 assert(!Negate);
4801 std::swap(LHS, RHS);
4802 NegateR = false;
4803 NegateAfterR = true;
4804 } else {
4805 // Negate the left sub-tree if possible, otherwise negate the result.
4806 NegateR = CanNegateR;
4807 NegateAfterR = !CanNegateR;
4808 }
4809 NegateL = true;
4810 NegateAfterAll = !Negate;
4811 } else {
4812 assert(Opcode == TargetOpcode::G_AND &&
4813 "Valid conjunction/disjunction tree");
4814 assert(!Negate && "Valid conjunction/disjunction tree");
4815
4816 NegateL = false;
4817 NegateR = false;
4818 NegateAfterR = false;
4819 NegateAfterAll = false;
4820 }
4821
4822 // Emit sub-trees.
4823 AArch64CC::CondCode RHSCC;
4824 MachineInstr *CmpR =
4825 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4826 if (NegateAfterR)
4827 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4829 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4830 if (NegateAfterAll)
4831 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4832 return CmpL;
4833}
4834
4835MachineInstr *AArch64InstructionSelector::emitConjunction(
4836 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4837 bool DummyCanNegate;
4838 bool DummyMustBeFirst;
4839 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4840 *MIB.getMRI()))
4841 return nullptr;
4842 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4843}
4844
4845bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4846 MachineInstr &CondMI) {
4847 AArch64CC::CondCode AArch64CC;
4848 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4849 if (!ConjMI)
4850 return false;
4851
4852 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4853 SelI.eraseFromParent();
4854 return true;
4855}
4856
4857bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4858 MachineRegisterInfo &MRI = *MIB.getMRI();
4859 // We want to recognize this pattern:
4860 //
4861 // $z = G_FCMP pred, $x, $y
4862 // ...
4863 // $w = G_SELECT $z, $a, $b
4864 //
4865 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4866 // some copies/truncs in between.)
4867 //
4868 // If we see this, then we can emit something like this:
4869 //
4870 // fcmp $x, $y
4871 // fcsel $w, $a, $b, pred
4872 //
4873 // Rather than emitting both of the rather long sequences in the standard
4874 // G_FCMP/G_SELECT select methods.
4875
4876 // First, check if the condition is defined by a compare.
4877 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4878
4879 // We can only fold if all of the defs have one use.
4880 Register CondDefReg = CondDef->getOperand(0).getReg();
4881 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4882 // Unless it's another select.
4883 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4884 if (CondDef == &UI)
4885 continue;
4886 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4887 return false;
4888 }
4889 }
4890
4891 // Is the condition defined by a compare?
4892 unsigned CondOpc = CondDef->getOpcode();
4893 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4894 if (tryOptSelectConjunction(I, *CondDef))
4895 return true;
4896 return false;
4897 }
4898
4900 if (CondOpc == TargetOpcode::G_ICMP) {
4901 auto Pred =
4902 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4904 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4905 CondDef->getOperand(1), MIB);
4906 } else {
4907 // Get the condition code for the select.
4908 auto Pred =
4909 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4910 AArch64CC::CondCode CondCode2;
4911 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4912
4913 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4914 // instructions to emit the comparison.
4915 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4916 // unnecessary.
4917 if (CondCode2 != AArch64CC::AL)
4918 return false;
4919
4920 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4921 CondDef->getOperand(3).getReg(), MIB)) {
4922 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4923 return false;
4924 }
4925 }
4926
4927 // Emit the select.
4928 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4929 I.getOperand(3).getReg(), CondCode, MIB);
4930 I.eraseFromParent();
4931 return true;
4932}
4933
4934MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4935 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4936 MachineIRBuilder &MIRBuilder) const {
4937 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4938 "Unexpected MachineOperand");
4939 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4940 // We want to find this sort of thing:
4941 // x = G_SUB 0, y
4942 // G_ICMP z, x
4943 //
4944 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4945 // e.g:
4946 //
4947 // cmn z, y
4948
4949 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4950 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4951 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4952 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4953 // Given this:
4954 //
4955 // x = G_SUB 0, y
4956 // G_ICMP x, z
4957 //
4958 // Produce this:
4959 //
4960 // cmn y, z
4961 if (isCMN(LHSDef, P, MRI))
4962 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4963
4964 // Same idea here, but with the RHS of the compare instead:
4965 //
4966 // Given this:
4967 //
4968 // x = G_SUB 0, y
4969 // G_ICMP z, x
4970 //
4971 // Produce this:
4972 //
4973 // cmn z, y
4974 if (isCMN(RHSDef, P, MRI))
4975 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4976
4977 // Given this:
4978 //
4979 // z = G_AND x, y
4980 // G_ICMP z, 0
4981 //
4982 // Produce this if the compare is signed:
4983 //
4984 // tst x, y
4985 if (!CmpInst::isUnsigned(P) && LHSDef &&
4986 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4987 // Make sure that the RHS is 0.
4988 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4989 if (!ValAndVReg || ValAndVReg->Value != 0)
4990 return nullptr;
4991
4992 return emitTST(LHSDef->getOperand(1),
4993 LHSDef->getOperand(2), MIRBuilder);
4994 }
4995
4996 return nullptr;
4997}
4998
4999bool AArch64InstructionSelector::selectShuffleVector(
5001 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5002 Register Src1Reg = I.getOperand(1).getReg();
5003 const LLT Src1Ty = MRI.getType(Src1Reg);
5004 Register Src2Reg = I.getOperand(2).getReg();
5005 const LLT Src2Ty = MRI.getType(Src2Reg);
5006 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5007
5008 MachineBasicBlock &MBB = *I.getParent();
5009 MachineFunction &MF = *MBB.getParent();
5010 LLVMContext &Ctx = MF.getFunction().getContext();
5011
5012 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5013 // it's originated from a <1 x T> type. Those should have been lowered into
5014 // G_BUILD_VECTOR earlier.
5015 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5016 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5017 return false;
5018 }
5019
5020 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5021
5023 for (int Val : Mask) {
5024 // For now, any undef indexes we'll just assume to be 0. This should be
5025 // optimized in future, e.g. to select DUP etc.
5026 Val = Val < 0 ? 0 : Val;
5027 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5028 unsigned Offset = Byte + Val * BytesPerElt;
5029 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5030 }
5031 }
5032
5033 // Use a constant pool to load the index vector for TBL.
5034 Constant *CPVal = ConstantVector::get(CstIdxs);
5035 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5036 if (!IndexLoad) {
5037 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5038 return false;
5039 }
5040
5041 if (DstTy.getSizeInBits() != 128) {
5042 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5043 // This case can be done with TBL1.
5045 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5046 if (!Concat) {
5047 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5048 return false;
5049 }
5050
5051 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5052 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5053 IndexLoad->getOperand(0).getReg(), MIB);
5054
5055 auto TBL1 = MIB.buildInstr(
5056 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5057 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5059
5060 auto Copy =
5061 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5062 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5063 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5064 I.eraseFromParent();
5065 return true;
5066 }
5067
5068 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5069 // Q registers for regalloc.
5070 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5071 auto RegSeq = createQTuple(Regs, MIB);
5072 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5073 {RegSeq, IndexLoad->getOperand(0)});
5075 I.eraseFromParent();
5076 return true;
5077}
5078
5079MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5080 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5081 unsigned LaneIdx, const RegisterBank &RB,
5082 MachineIRBuilder &MIRBuilder) const {
5083 MachineInstr *InsElt = nullptr;
5084 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5085 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5086
5087 // Create a register to define with the insert if one wasn't passed in.
5088 if (!DstReg)
5089 DstReg = MRI.createVirtualRegister(DstRC);
5090
5091 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5092 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5093
5094 if (RB.getID() == AArch64::FPRRegBankID) {
5095 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5096 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5097 .addImm(LaneIdx)
5098 .addUse(InsSub->getOperand(0).getReg())
5099 .addImm(0);
5100 } else {
5101 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5102 .addImm(LaneIdx)
5103 .addUse(EltReg);
5104 }
5105
5107 return InsElt;
5108}
5109
5110bool AArch64InstructionSelector::selectUSMovFromExtend(
5112 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5113 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5114 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5115 return false;
5116 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5117 const Register DefReg = MI.getOperand(0).getReg();
5118 const LLT DstTy = MRI.getType(DefReg);
5119 unsigned DstSize = DstTy.getSizeInBits();
5120
5121 if (DstSize != 32 && DstSize != 64)
5122 return false;
5123
5124 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5125 MI.getOperand(1).getReg(), MRI);
5126 int64_t Lane;
5127 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5128 return false;
5129 Register Src0 = Extract->getOperand(1).getReg();
5130
5131 const LLT &VecTy = MRI.getType(Src0);
5132
5133 if (VecTy.getSizeInBits() != 128) {
5134 const MachineInstr *ScalarToVector = emitScalarToVector(
5135 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5136 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5137 Src0 = ScalarToVector->getOperand(0).getReg();
5138 }
5139
5140 unsigned Opcode;
5141 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5142 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5143 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5144 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5145 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5146 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5147 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5148 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5149 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5150 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5151 else
5152 llvm_unreachable("Unexpected type combo for S/UMov!");
5153
5154 // We may need to generate one of these, depending on the type and sign of the
5155 // input:
5156 // DstReg = SMOV Src0, Lane;
5157 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5158 MachineInstr *ExtI = nullptr;
5159 if (DstSize == 64 && !IsSigned) {
5160 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5161 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5162 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5163 .addImm(0)
5164 .addUse(NewReg)
5165 .addImm(AArch64::sub_32);
5166 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5167 } else
5168 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5169
5171 MI.eraseFromParent();
5172 return true;
5173}
5174
5175MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5176 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5177 unsigned int Op;
5178 if (DstSize == 128) {
5179 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5180 return nullptr;
5181 Op = AArch64::MOVIv16b_ns;
5182 } else {
5183 Op = AArch64::MOVIv8b_ns;
5184 }
5185
5186 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5187
5190 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5192 return &*Mov;
5193 }
5194 return nullptr;
5195}
5196
5197MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5198 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5199 bool Inv) {
5200
5201 unsigned int Op;
5202 if (DstSize == 128) {
5203 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5204 return nullptr;
5205 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5206 } else {
5207 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5208 }
5209
5210 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5211 uint64_t Shift;
5212
5215 Shift = 0;
5216 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5218 Shift = 8;
5219 } else
5220 return nullptr;
5221
5222 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5224 return &*Mov;
5225}
5226
5227MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5228 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5229 bool Inv) {
5230
5231 unsigned int Op;
5232 if (DstSize == 128) {
5233 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5234 return nullptr;
5235 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5236 } else {
5237 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5238 }
5239
5240 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5241 uint64_t Shift;
5242
5245 Shift = 0;
5246 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5248 Shift = 8;
5249 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5251 Shift = 16;
5252 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5254 Shift = 24;
5255 } else
5256 return nullptr;
5257
5258 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5260 return &*Mov;
5261}
5262
5263MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5264 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5265
5266 unsigned int Op;
5267 if (DstSize == 128) {
5268 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5269 return nullptr;
5270 Op = AArch64::MOVIv2d_ns;
5271 } else {
5272 Op = AArch64::MOVID;
5273 }
5274
5275 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5278 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5280 return &*Mov;
5281 }
5282 return nullptr;
5283}
5284
5285MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5286 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5287 bool Inv) {
5288
5289 unsigned int Op;
5290 if (DstSize == 128) {
5291 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5292 return nullptr;
5293 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5294 } else {
5295 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5296 }
5297
5298 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5299 uint64_t Shift;
5300
5303 Shift = 264;
5304 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5306 Shift = 272;
5307 } else
5308 return nullptr;
5309
5310 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5312 return &*Mov;
5313}
5314
5315MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5316 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5317
5318 unsigned int Op;
5319 bool IsWide = false;
5320 if (DstSize == 128) {
5321 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5322 return nullptr;
5323 Op = AArch64::FMOVv4f32_ns;
5324 IsWide = true;
5325 } else {
5326 Op = AArch64::FMOVv2f32_ns;
5327 }
5328
5329 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5330
5333 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5335 Op = AArch64::FMOVv2f64_ns;
5336 } else
5337 return nullptr;
5338
5339 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5341 return &*Mov;
5342}
5343
5344bool AArch64InstructionSelector::selectIndexedExtLoad(
5346 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5347 Register Dst = ExtLd.getDstReg();
5348 Register WriteBack = ExtLd.getWritebackReg();
5349 Register Base = ExtLd.getBaseReg();
5350 Register Offset = ExtLd.getOffsetReg();
5351 LLT Ty = MRI.getType(Dst);
5352 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5353 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5354 bool IsPre = ExtLd.isPre();
5355 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5356 bool InsertIntoXReg = false;
5357 bool IsDst64 = Ty.getSizeInBits() == 64;
5358
5359 unsigned Opc = 0;
5360 LLT NewLdDstTy;
5361 LLT s32 = LLT::scalar(32);
5362 LLT s64 = LLT::scalar(64);
5363
5364 if (MemSizeBits == 8) {
5365 if (IsSExt) {
5366 if (IsDst64)
5367 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5368 else
5369 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5370 NewLdDstTy = IsDst64 ? s64 : s32;
5371 } else {
5372 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5373 InsertIntoXReg = IsDst64;
5374 NewLdDstTy = s32;
5375 }
5376 } else if (MemSizeBits == 16) {
5377 if (IsSExt) {
5378 if (IsDst64)
5379 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5380 else
5381 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5382 NewLdDstTy = IsDst64 ? s64 : s32;
5383 } else {
5384 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5385 InsertIntoXReg = IsDst64;
5386 NewLdDstTy = s32;
5387 }
5388 } else if (MemSizeBits == 32) {
5389 if (IsSExt) {
5390 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5391 NewLdDstTy = s64;
5392 } else {
5393 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5394 InsertIntoXReg = IsDst64;
5395 NewLdDstTy = s32;
5396 }
5397 } else {
5398 llvm_unreachable("Unexpected size for indexed load");
5399 }
5400
5401 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5402 return false; // We should be on gpr.
5403
5404 auto Cst = getIConstantVRegVal(Offset, MRI);
5405 if (!Cst)
5406 return false; // Shouldn't happen, but just in case.
5407
5408 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5409 .addImm(Cst->getSExtValue());
5410 LdMI.cloneMemRefs(ExtLd);
5412 // Make sure to select the load with the MemTy as the dest type, and then
5413 // insert into X reg if needed.
5414 if (InsertIntoXReg) {
5415 // Generate a SUBREG_TO_REG.
5416 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5417 .addImm(0)
5418 .addUse(LdMI.getReg(1))
5419 .addImm(AArch64::sub_32);
5420 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5421 MRI);
5422 } else {
5423 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5424 selectCopy(*Copy, TII, MRI, TRI, RBI);
5425 }
5426 MI.eraseFromParent();
5427
5428 return true;
5429}
5430
5431bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5433 auto &Ld = cast<GIndexedLoad>(MI);
5434 Register Dst = Ld.getDstReg();
5435 Register WriteBack = Ld.getWritebackReg();
5436 Register Base = Ld.getBaseReg();
5437 Register Offset = Ld.getOffsetReg();
5438 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5439 "Unexpected type for indexed load");
5440 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5441
5442 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5443 return selectIndexedExtLoad(MI, MRI);
5444
5445 unsigned Opc = 0;
5446 if (Ld.isPre()) {
5447 static constexpr unsigned GPROpcodes[] = {
5448 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5449 AArch64::LDRXpre};
5450 static constexpr unsigned FPROpcodes[] = {
5451 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5452 AArch64::LDRQpre};
5453 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5454 Opc = FPROpcodes[Log2_32(MemSize)];
5455 else
5456 Opc = GPROpcodes[Log2_32(MemSize)];
5457 } else {
5458 static constexpr unsigned GPROpcodes[] = {
5459 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5460 AArch64::LDRXpost};
5461 static constexpr unsigned FPROpcodes[] = {
5462 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5463 AArch64::LDRDpost, AArch64::LDRQpost};
5464 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5465 Opc = FPROpcodes[Log2_32(MemSize)];
5466 else
5467 Opc = GPROpcodes[Log2_32(MemSize)];
5468 }
5469 auto Cst = getIConstantVRegVal(Offset, MRI);
5470 if (!Cst)
5471 return false; // Shouldn't happen, but just in case.
5472 auto LdMI =
5473 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5474 LdMI.cloneMemRefs(Ld);
5476 MI.eraseFromParent();
5477 return true;
5478}
5479
5480bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5482 Register Dst = I.getWritebackReg();
5483 Register Val = I.getValueReg();
5484 Register Base = I.getBaseReg();
5485 Register Offset = I.getOffsetReg();
5486 LLT ValTy = MRI.getType(Val);
5487 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5488
5489 unsigned Opc = 0;
5490 if (I.isPre()) {
5491 static constexpr unsigned GPROpcodes[] = {
5492 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5493 AArch64::STRXpre};
5494 static constexpr unsigned FPROpcodes[] = {
5495 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5496 AArch64::STRQpre};
5497
5498 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5499 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5500 else
5501 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5502 } else {
5503 static constexpr unsigned GPROpcodes[] = {
5504 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5505 AArch64::STRXpost};
5506 static constexpr unsigned FPROpcodes[] = {
5507 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5508 AArch64::STRDpost, AArch64::STRQpost};
5509
5510 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5511 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5512 else
5513 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5514 }
5515
5516 auto Cst = getIConstantVRegVal(Offset, MRI);
5517 if (!Cst)
5518 return false; // Shouldn't happen, but just in case.
5519 auto Str =
5520 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5521 Str.cloneMemRefs(I);
5523 I.eraseFromParent();
5524 return true;
5525}
5526
5528AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5529 MachineIRBuilder &MIRBuilder,
5531 LLT DstTy = MRI.getType(Dst);
5532 unsigned DstSize = DstTy.getSizeInBits();
5533 if (CV->isNullValue()) {
5534 if (DstSize == 128) {
5535 auto Mov =
5536 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5538 return &*Mov;
5539 }
5540
5541 if (DstSize == 64) {
5542 auto Mov =
5543 MIRBuilder
5544 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5545 .addImm(0);
5546 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5547 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5548 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5549 return &*Copy;
5550 }
5551 }
5552
5553 if (CV->getSplatValue()) {
5554 APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5555 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5556 MachineInstr *NewOp;
5557 bool Inv = false;
5558 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5559 (NewOp =
5560 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5561 (NewOp =
5562 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5563 (NewOp =
5564 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5565 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5566 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5567 return NewOp;
5568
5569 DefBits = ~DefBits;
5570 Inv = true;
5571 if ((NewOp =
5572 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5573 (NewOp =
5574 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5575 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5576 return NewOp;
5577 return nullptr;
5578 };
5579
5580 if (auto *NewOp = TryMOVIWithBits(DefBits))
5581 return NewOp;
5582
5583 // See if a fneg of the constant can be materialized with a MOVI, etc
5584 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5585 unsigned NegOpc) -> MachineInstr * {
5586 // FNegate each sub-element of the constant
5587 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5588 APInt NegBits(DstSize, 0);
5589 unsigned NumElts = DstSize / NumBits;
5590 for (unsigned i = 0; i < NumElts; i++)
5591 NegBits |= Neg << (NumBits * i);
5592 NegBits = DefBits ^ NegBits;
5593
5594 // Try to create the new constants with MOVI, and if so generate a fneg
5595 // for it.
5596 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5597 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5598 NewOp->getOperand(0).setReg(NewDst);
5599 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5600 }
5601 return nullptr;
5602 };
5603 MachineInstr *R;
5604 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5605 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5606 (STI.hasFullFP16() &&
5607 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5608 return R;
5609 }
5610
5611 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5612 if (!CPLoad) {
5613 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5614 return nullptr;
5615 }
5616
5617 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5618 RBI.constrainGenericRegister(
5619 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5620 return &*Copy;
5621}
5622
5623bool AArch64InstructionSelector::tryOptConstantBuildVec(
5625 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5626 unsigned DstSize = DstTy.getSizeInBits();
5627 assert(DstSize <= 128 && "Unexpected build_vec type!");
5628 if (DstSize < 32)
5629 return false;
5630 // Check if we're building a constant vector, in which case we want to
5631 // generate a constant pool load instead of a vector insert sequence.
5633 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5634 // Try to find G_CONSTANT or G_FCONSTANT
5635 auto *OpMI =
5636 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5637 if (OpMI)
5638 Csts.emplace_back(
5639 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5640 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5641 I.getOperand(Idx).getReg(), MRI)))
5642 Csts.emplace_back(
5643 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5644 else
5645 return false;
5646 }
5647 Constant *CV = ConstantVector::get(Csts);
5648 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5649 return false;
5650 I.eraseFromParent();
5651 return true;
5652}
5653
5654bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5656 // Given:
5657 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5658 //
5659 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5660 Register Dst = I.getOperand(0).getReg();
5661 Register EltReg = I.getOperand(1).getReg();
5662 LLT EltTy = MRI.getType(EltReg);
5663 // If the index isn't on the same bank as its elements, then this can't be a
5664 // SUBREG_TO_REG.
5665 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5666 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5667 if (EltRB != DstRB)
5668 return false;
5669 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5670 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5671 }))
5672 return false;
5673 unsigned SubReg;
5674 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5675 if (!EltRC)
5676 return false;
5677 const TargetRegisterClass *DstRC =
5678 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5679 if (!DstRC)
5680 return false;
5681 if (!getSubRegForClass(EltRC, TRI, SubReg))
5682 return false;
5683 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5684 .addImm(0)
5685 .addUse(EltReg)
5686 .addImm(SubReg);
5687 I.eraseFromParent();
5688 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5689 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5690}
5691
5692bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5694 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5695 // Until we port more of the optimized selections, for now just use a vector
5696 // insert sequence.
5697 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5698 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5699 unsigned EltSize = EltTy.getSizeInBits();
5700
5701 if (tryOptConstantBuildVec(I, DstTy, MRI))
5702 return true;
5703 if (tryOptBuildVecToSubregToReg(I, MRI))
5704 return true;
5705
5706 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5707 return false; // Don't support all element types yet.
5708 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5709
5710 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5711 MachineInstr *ScalarToVec =
5712 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5713 I.getOperand(1).getReg(), MIB);
5714 if (!ScalarToVec)
5715 return false;
5716
5717 Register DstVec = ScalarToVec->getOperand(0).getReg();
5718 unsigned DstSize = DstTy.getSizeInBits();
5719
5720 // Keep track of the last MI we inserted. Later on, we might be able to save
5721 // a copy using it.
5722 MachineInstr *PrevMI = ScalarToVec;
5723 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5724 // Note that if we don't do a subregister copy, we can end up making an
5725 // extra register.
5726 Register OpReg = I.getOperand(i).getReg();
5727 // Do not emit inserts for undefs
5728 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5729 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5730 DstVec = PrevMI->getOperand(0).getReg();
5731 }
5732 }
5733
5734 // If DstTy's size in bits is less than 128, then emit a subregister copy
5735 // from DstVec to the last register we've defined.
5736 if (DstSize < 128) {
5737 // Force this to be FPR using the destination vector.
5738 const TargetRegisterClass *RC =
5739 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5740 if (!RC)
5741 return false;
5742 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5743 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5744 return false;
5745 }
5746
5747 unsigned SubReg = 0;
5748 if (!getSubRegForClass(RC, TRI, SubReg))
5749 return false;
5750 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5751 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5752 << "\n");
5753 return false;
5754 }
5755
5756 Register Reg = MRI.createVirtualRegister(RC);
5757 Register DstReg = I.getOperand(0).getReg();
5758
5759 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5760 MachineOperand &RegOp = I.getOperand(1);
5761 RegOp.setReg(Reg);
5762 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5763 } else {
5764 // We either have a vector with all elements (except the first one) undef or
5765 // at least one non-undef non-first element. In the first case, we need to
5766 // constrain the output register ourselves as we may have generated an
5767 // INSERT_SUBREG operation which is a generic operation for which the
5768 // output regclass cannot be automatically chosen.
5769 //
5770 // In the second case, there is no need to do this as it may generate an
5771 // instruction like INSvi32gpr where the regclass can be automatically
5772 // chosen.
5773 //
5774 // Also, we save a copy by re-using the destination register on the final
5775 // insert.
5776 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5778
5779 Register DstReg = PrevMI->getOperand(0).getReg();
5780 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5781 const TargetRegisterClass *RC =
5782 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5783 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5784 }
5785 }
5786
5787 I.eraseFromParent();
5788 return true;
5789}
5790
5791bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5792 unsigned NumVecs,
5793 MachineInstr &I) {
5794 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5795 assert(Opc && "Expected an opcode?");
5796 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5797 auto &MRI = *MIB.getMRI();
5798 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5799 unsigned Size = Ty.getSizeInBits();
5800 assert((Size == 64 || Size == 128) &&
5801 "Destination must be 64 bits or 128 bits?");
5802 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5803 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5804 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5805 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5806 Load.cloneMemRefs(I);
5808 Register SelectedLoadDst = Load->getOperand(0).getReg();
5809 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5810 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5811 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5812 // Emit the subreg copies and immediately select them.
5813 // FIXME: We should refactor our copy code into an emitCopy helper and
5814 // clean up uses of this pattern elsewhere in the selector.
5815 selectCopy(*Vec, TII, MRI, TRI, RBI);
5816 }
5817 return true;
5818}
5819
5820bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5821 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5822 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5823 assert(Opc && "Expected an opcode?");
5824 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5825 auto &MRI = *MIB.getMRI();
5826 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5827 bool Narrow = Ty.getSizeInBits() == 64;
5828
5829 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5830 SmallVector<Register, 4> Regs(NumVecs);
5831 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5832 [](auto MO) { return MO.getReg(); });
5833
5834 if (Narrow) {
5835 transform(Regs, Regs.begin(), [this](Register Reg) {
5836 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5837 ->getOperand(0)
5838 .getReg();
5839 });
5840 Ty = Ty.multiplyElements(2);
5841 }
5842
5843 Register Tuple = createQTuple(Regs, MIB);
5844 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5845 if (!LaneNo)
5846 return false;
5847
5848 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
5849 auto Load = MIB.buildInstr(Opc, {Ty}, {})
5850 .addReg(Tuple)
5851 .addImm(LaneNo->getZExtValue())
5852 .addReg(Ptr);
5853 Load.cloneMemRefs(I);
5855 Register SelectedLoadDst = Load->getOperand(0).getReg();
5856 unsigned SubReg = AArch64::qsub0;
5857 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5858 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
5859 {Narrow ? DstOp(&AArch64::FPR128RegClass)
5860 : DstOp(I.getOperand(Idx).getReg())},
5861 {})
5862 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5863 Register WideReg = Vec.getReg(0);
5864 // Emit the subreg copies and immediately select them.
5865 selectCopy(*Vec, TII, MRI, TRI, RBI);
5866 if (Narrow &&
5867 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
5868 return false;
5869 }
5870 return true;
5871}
5872
5873void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
5874 unsigned NumVecs,
5875 unsigned Opc) {
5876 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5877 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5878 Register Ptr = I.getOperand(1 + NumVecs).getReg();
5879
5880 SmallVector<Register, 2> Regs(NumVecs);
5881 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5882 Regs.begin(), [](auto MO) { return MO.getReg(); });
5883
5884 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5885 : createDTuple(Regs, MIB);
5886 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5887 Store.cloneMemRefs(I);
5889}
5890
5891bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5892 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
5893 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5894 LLT Ty = MRI.getType(I.getOperand(1).getReg());
5895 bool Narrow = Ty.getSizeInBits() == 64;
5896
5897 SmallVector<Register, 2> Regs(NumVecs);
5898 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
5899 Regs.begin(), [](auto MO) { return MO.getReg(); });
5900
5901 if (Narrow)
5902 transform(Regs, Regs.begin(), [this](Register Reg) {
5903 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5904 ->getOperand(0)
5905 .getReg();
5906 });
5907
5908 Register Tuple = createQTuple(Regs, MIB);
5909
5910 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
5911 if (!LaneNo)
5912 return false;
5913 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
5914 auto Store = MIB.buildInstr(Opc, {}, {})
5915 .addReg(Tuple)
5916 .addImm(LaneNo->getZExtValue())
5917 .addReg(Ptr);
5918 Store.cloneMemRefs(I);
5920 return true;
5921}
5922
5923bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5925 // Find the intrinsic ID.
5926 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
5927
5928 const LLT S8 = LLT::scalar(8);
5929 const LLT S16 = LLT::scalar(16);
5930 const LLT S32 = LLT::scalar(32);
5931 const LLT S64 = LLT::scalar(64);
5932 const LLT P0 = LLT::pointer(0, 64);
5933 // Select the instruction.
5934 switch (IntrinID) {
5935 default:
5936 return false;
5937 case Intrinsic::aarch64_ldxp:
5938 case Intrinsic::aarch64_ldaxp: {
5939 auto NewI = MIB.buildInstr(
5940 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5941 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5942 {I.getOperand(3)});
5943 NewI.cloneMemRefs(I);
5945 break;
5946 }
5947 case Intrinsic::aarch64_neon_ld1x2: {
5948 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5949 unsigned Opc = 0;
5950 if (Ty == LLT::fixed_vector(8, S8))
5951 Opc = AArch64::LD1Twov8b;
5952 else if (Ty == LLT::fixed_vector(16, S8))
5953 Opc = AArch64::LD1Twov16b;
5954 else if (Ty == LLT::fixed_vector(4, S16))
5955 Opc = AArch64::LD1Twov4h;
5956 else if (Ty == LLT::fixed_vector(8, S16))
5957 Opc = AArch64::LD1Twov8h;
5958 else if (Ty == LLT::fixed_vector(2, S32))
5959 Opc = AArch64::LD1Twov2s;
5960 else if (Ty == LLT::fixed_vector(4, S32))
5961 Opc = AArch64::LD1Twov4s;
5962 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5963 Opc = AArch64::LD1Twov2d;
5964 else if (Ty == S64 || Ty == P0)
5965 Opc = AArch64::LD1Twov1d;
5966 else
5967 llvm_unreachable("Unexpected type for ld1x2!");
5968 selectVectorLoadIntrinsic(Opc, 2, I);
5969 break;
5970 }
5971 case Intrinsic::aarch64_neon_ld1x3: {
5972 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5973 unsigned Opc = 0;
5974 if (Ty == LLT::fixed_vector(8, S8))
5975 Opc = AArch64::LD1Threev8b;
5976 else if (Ty == LLT::fixed_vector(16, S8))
5977 Opc = AArch64::LD1Threev16b;
5978 else if (Ty == LLT::fixed_vector(4, S16))
5979 Opc = AArch64::LD1Threev4h;
5980 else if (Ty == LLT::fixed_vector(8, S16))
5981 Opc = AArch64::LD1Threev8h;
5982 else if (Ty == LLT::fixed_vector(2, S32))
5983 Opc = AArch64::LD1Threev2s;
5984 else if (Ty == LLT::fixed_vector(4, S32))
5985 Opc = AArch64::LD1Threev4s;
5986 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5987 Opc = AArch64::LD1Threev2d;
5988 else if (Ty == S64 || Ty == P0)
5989 Opc = AArch64::LD1Threev1d;
5990 else
5991 llvm_unreachable("Unexpected type for ld1x3!");
5992 selectVectorLoadIntrinsic(Opc, 3, I);
5993 break;
5994 }
5995 case Intrinsic::aarch64_neon_ld1x4: {
5996 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5997 unsigned Opc = 0;
5998 if (Ty == LLT::fixed_vector(8, S8))
5999 Opc = AArch64::LD1Fourv8b;
6000 else if (Ty == LLT::fixed_vector(16, S8))
6001 Opc = AArch64::LD1Fourv16b;
6002 else if (Ty == LLT::fixed_vector(4, S16))
6003 Opc = AArch64::LD1Fourv4h;
6004 else if (Ty == LLT::fixed_vector(8, S16))
6005 Opc = AArch64::LD1Fourv8h;
6006 else if (Ty == LLT::fixed_vector(2, S32))
6007 Opc = AArch64::LD1Fourv2s;
6008 else if (Ty == LLT::fixed_vector(4, S32))
6009 Opc = AArch64::LD1Fourv4s;
6010 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6011 Opc = AArch64::LD1Fourv2d;
6012 else if (Ty == S64 || Ty == P0)
6013 Opc = AArch64::LD1Fourv1d;
6014 else
6015 llvm_unreachable("Unexpected type for ld1x4!");
6016 selectVectorLoadIntrinsic(Opc, 4, I);
6017 break;
6018 }
6019 case Intrinsic::aarch64_neon_ld2: {
6020 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6021 unsigned Opc = 0;
6022 if (Ty == LLT::fixed_vector(8, S8))
6023 Opc = AArch64::LD2Twov8b;
6024 else if (Ty == LLT::fixed_vector(16, S8))
6025 Opc = AArch64::LD2Twov16b;
6026 else if (Ty == LLT::fixed_vector(4, S16))
6027 Opc = AArch64::LD2Twov4h;
6028 else if (Ty == LLT::fixed_vector(8, S16))
6029 Opc = AArch64::LD2Twov8h;
6030 else if (Ty == LLT::fixed_vector(2, S32))
6031 Opc = AArch64::LD2Twov2s;
6032 else if (Ty == LLT::fixed_vector(4, S32))
6033 Opc = AArch64::LD2Twov4s;
6034 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6035 Opc = AArch64::LD2Twov2d;
6036 else if (Ty == S64 || Ty == P0)
6037 Opc = AArch64::LD1Twov1d;
6038 else
6039 llvm_unreachable("Unexpected type for ld2!");
6040 selectVectorLoadIntrinsic(Opc, 2, I);
6041 break;
6042 }
6043 case Intrinsic::aarch64_neon_ld2lane: {
6044 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6045 unsigned Opc;
6046 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6047 Opc = AArch64::LD2i8;
6048 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6049 Opc = AArch64::LD2i16;
6050 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6051 Opc = AArch64::LD2i32;
6052 else if (Ty == LLT::fixed_vector(2, S64) ||
6053 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6054 Opc = AArch64::LD2i64;
6055 else
6056 llvm_unreachable("Unexpected type for st2lane!");
6057 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6058 return false;
6059 break;
6060 }
6061 case Intrinsic::aarch64_neon_ld2r: {
6062 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6063 unsigned Opc = 0;
6064 if (Ty == LLT::fixed_vector(8, S8))
6065 Opc = AArch64::LD2Rv8b;
6066 else if (Ty == LLT::fixed_vector(16, S8))
6067 Opc = AArch64::LD2Rv16b;
6068 else if (Ty == LLT::fixed_vector(4, S16))
6069 Opc = AArch64::LD2Rv4h;
6070 else if (Ty == LLT::fixed_vector(8, S16))
6071 Opc = AArch64::LD2Rv8h;
6072 else if (Ty == LLT::fixed_vector(2, S32))
6073 Opc = AArch64::LD2Rv2s;
6074 else if (Ty == LLT::fixed_vector(4, S32))
6075 Opc = AArch64::LD2Rv4s;
6076 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6077 Opc = AArch64::LD2Rv2d;
6078 else if (Ty == S64 || Ty == P0)
6079 Opc = AArch64::LD2Rv1d;
6080 else
6081 llvm_unreachable("Unexpected type for ld2r!");
6082 selectVectorLoadIntrinsic(Opc, 2, I);
6083 break;
6084 }
6085 case Intrinsic::aarch64_neon_ld3: {
6086 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6087 unsigned Opc = 0;
6088 if (Ty == LLT::fixed_vector(8, S8))
6089 Opc = AArch64::LD3Threev8b;
6090 else if (Ty == LLT::fixed_vector(16, S8))
6091 Opc = AArch64::LD3Threev16b;
6092 else if (Ty == LLT::fixed_vector(4, S16))
6093 Opc = AArch64::LD3Threev4h;
6094 else if (Ty == LLT::fixed_vector(8, S16))
6095 Opc = AArch64::LD3Threev8h;
6096 else if (Ty == LLT::fixed_vector(2, S32))
6097 Opc = AArch64::LD3Threev2s;
6098 else if (Ty == LLT::fixed_vector(4, S32))
6099 Opc = AArch64::LD3Threev4s;
6100 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6101 Opc = AArch64::LD3Threev2d;
6102 else if (Ty == S64 || Ty == P0)
6103 Opc = AArch64::LD1Threev1d;
6104 else
6105 llvm_unreachable("Unexpected type for ld3!");
6106 selectVectorLoadIntrinsic(Opc, 3, I);
6107 break;
6108 }
6109 case Intrinsic::aarch64_neon_ld3lane: {
6110 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6111 unsigned Opc;
6112 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6113 Opc = AArch64::LD3i8;
6114 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6115 Opc = AArch64::LD3i16;
6116 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6117 Opc = AArch64::LD3i32;
6118 else if (Ty == LLT::fixed_vector(2, S64) ||
6119 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6120 Opc = AArch64::LD3i64;
6121 else
6122 llvm_unreachable("Unexpected type for st3lane!");
6123 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6124 return false;
6125 break;
6126 }
6127 case Intrinsic::aarch64_neon_ld3r: {
6128 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6129 unsigned Opc = 0;
6130 if (Ty == LLT::fixed_vector(8, S8))
6131 Opc = AArch64::LD3Rv8b;
6132 else if (Ty == LLT::fixed_vector(16, S8))
6133 Opc = AArch64::LD3Rv16b;
6134 else if (Ty == LLT::fixed_vector(4, S16))
6135 Opc = AArch64::LD3Rv4h;
6136 else if (Ty == LLT::fixed_vector(8, S16))
6137 Opc = AArch64::LD3Rv8h;
6138 else if (Ty == LLT::fixed_vector(2, S32))
6139 Opc = AArch64::LD3Rv2s;
6140 else if (Ty == LLT::fixed_vector(4, S32))
6141 Opc = AArch64::LD3Rv4s;
6142 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6143 Opc = AArch64::LD3Rv2d;
6144 else if (Ty == S64 || Ty == P0)
6145 Opc = AArch64::LD3Rv1d;
6146 else
6147 llvm_unreachable("Unexpected type for ld3r!");
6148 selectVectorLoadIntrinsic(Opc, 3, I);
6149 break;
6150 }
6151 case Intrinsic::aarch64_neon_ld4: {
6152 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6153 unsigned Opc = 0;
6154 if (Ty == LLT::fixed_vector(8, S8))
6155 Opc = AArch64::LD4Fourv8b;
6156 else if (Ty == LLT::fixed_vector(16, S8))
6157 Opc = AArch64::LD4Fourv16b;
6158 else if (Ty == LLT::fixed_vector(4, S16))
6159 Opc = AArch64::LD4Fourv4h;
6160 else if (Ty == LLT::fixed_vector(8, S16))
6161 Opc = AArch64::LD4Fourv8h;
6162 else if (Ty == LLT::fixed_vector(2, S32))
6163 Opc = AArch64::LD4Fourv2s;
6164 else if (Ty == LLT::fixed_vector(4, S32))
6165 Opc = AArch64::LD4Fourv4s;
6166 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6167 Opc = AArch64::LD4Fourv2d;
6168 else if (Ty == S64 || Ty == P0)
6169 Opc = AArch64::LD1Fourv1d;
6170 else
6171 llvm_unreachable("Unexpected type for ld4!");
6172 selectVectorLoadIntrinsic(Opc, 4, I);
6173 break;
6174 }
6175 case Intrinsic::aarch64_neon_ld4lane: {
6176 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6177 unsigned Opc;
6178 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6179 Opc = AArch64::LD4i8;
6180 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6181 Opc = AArch64::LD4i16;
6182 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6183 Opc = AArch64::LD4i32;
6184 else if (Ty == LLT::fixed_vector(2, S64) ||
6185 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6186 Opc = AArch64::LD4i64;
6187 else
6188 llvm_unreachable("Unexpected type for st4lane!");
6189 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6190 return false;
6191 break;
6192 }
6193 case Intrinsic::aarch64_neon_ld4r: {
6194 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6195 unsigned Opc = 0;
6196 if (Ty == LLT::fixed_vector(8, S8))
6197 Opc = AArch64::LD4Rv8b;
6198 else if (Ty == LLT::fixed_vector(16, S8))
6199 Opc = AArch64::LD4Rv16b;
6200 else if (Ty == LLT::fixed_vector(4, S16))
6201 Opc = AArch64::LD4Rv4h;
6202 else if (Ty == LLT::fixed_vector(8, S16))
6203 Opc = AArch64::LD4Rv8h;
6204 else if (Ty == LLT::fixed_vector(2, S32))
6205 Opc = AArch64::LD4Rv2s;
6206 else if (Ty == LLT::fixed_vector(4, S32))
6207 Opc = AArch64::LD4Rv4s;
6208 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6209 Opc = AArch64::LD4Rv2d;
6210 else if (Ty == S64 || Ty == P0)
6211 Opc = AArch64::LD4Rv1d;
6212 else
6213 llvm_unreachable("Unexpected type for ld4r!");
6214 selectVectorLoadIntrinsic(Opc, 4, I);
6215 break;
6216 }
6217 case Intrinsic::aarch64_neon_st1x2: {
6218 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6219 unsigned Opc;
6220 if (Ty == LLT::fixed_vector(8, S8))
6221 Opc = AArch64::ST1Twov8b;
6222 else if (Ty == LLT::fixed_vector(16, S8))
6223 Opc = AArch64::ST1Twov16b;
6224 else if (Ty == LLT::fixed_vector(4, S16))
6225 Opc = AArch64::ST1Twov4h;
6226 else if (Ty == LLT::fixed_vector(8, S16))
6227 Opc = AArch64::ST1Twov8h;
6228 else if (Ty == LLT::fixed_vector(2, S32))
6229 Opc = AArch64::ST1Twov2s;
6230 else if (Ty == LLT::fixed_vector(4, S32))
6231 Opc = AArch64::ST1Twov4s;
6232 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6233 Opc = AArch64::ST1Twov2d;
6234 else if (Ty == S64 || Ty == P0)
6235 Opc = AArch64::ST1Twov1d;
6236 else
6237 llvm_unreachable("Unexpected type for st1x2!");
6238 selectVectorStoreIntrinsic(I, 2, Opc);
6239 break;
6240 }
6241 case Intrinsic::aarch64_neon_st1x3: {
6242 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6243 unsigned Opc;
6244 if (Ty == LLT::fixed_vector(8, S8))
6245 Opc = AArch64::ST1Threev8b;
6246 else if (Ty == LLT::fixed_vector(16, S8))
6247 Opc = AArch64::ST1Threev16b;
6248 else if (Ty == LLT::fixed_vector(4, S16))
6249 Opc = AArch64::ST1Threev4h;
6250 else if (Ty == LLT::fixed_vector(8, S16))
6251 Opc = AArch64::ST1Threev8h;
6252 else if (Ty == LLT::fixed_vector(2, S32))
6253 Opc = AArch64::ST1Threev2s;
6254 else if (Ty == LLT::fixed_vector(4, S32))
6255 Opc = AArch64::ST1Threev4s;
6256 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6257 Opc = AArch64::ST1Threev2d;
6258 else if (Ty == S64 || Ty == P0)
6259 Opc = AArch64::ST1Threev1d;
6260 else
6261 llvm_unreachable("Unexpected type for st1x3!");
6262 selectVectorStoreIntrinsic(I, 3, Opc);
6263 break;
6264 }
6265 case Intrinsic::aarch64_neon_st1x4: {
6266 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6267 unsigned Opc;
6268 if (Ty == LLT::fixed_vector(8, S8))
6269 Opc = AArch64::ST1Fourv8b;
6270 else if (Ty == LLT::fixed_vector(16, S8))
6271 Opc = AArch64::ST1Fourv16b;
6272 else if (Ty == LLT::fixed_vector(4, S16))
6273 Opc = AArch64::ST1Fourv4h;
6274 else if (Ty == LLT::fixed_vector(8, S16))
6275 Opc = AArch64::ST1Fourv8h;
6276 else if (Ty == LLT::fixed_vector(2, S32))
6277 Opc = AArch64::ST1Fourv2s;
6278 else if (Ty == LLT::fixed_vector(4, S32))
6279 Opc = AArch64::ST1Fourv4s;
6280 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6281 Opc = AArch64::ST1Fourv2d;
6282 else if (Ty == S64 || Ty == P0)
6283 Opc = AArch64::ST1Fourv1d;
6284 else
6285 llvm_unreachable("Unexpected type for st1x4!");
6286 selectVectorStoreIntrinsic(I, 4, Opc);
6287 break;
6288 }
6289 case Intrinsic::aarch64_neon_st2: {
6290 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6291 unsigned Opc;
6292 if (Ty == LLT::fixed_vector(8, S8))
6293 Opc = AArch64::ST2Twov8b;
6294 else if (Ty == LLT::fixed_vector(16, S8))
6295 Opc = AArch64::ST2Twov16b;
6296 else if (Ty == LLT::fixed_vector(4, S16))
6297 Opc = AArch64::ST2Twov4h;
6298 else if (Ty == LLT::fixed_vector(8, S16))
6299 Opc = AArch64::ST2Twov8h;
6300 else if (Ty == LLT::fixed_vector(2, S32))
6301 Opc = AArch64::ST2Twov2s;
6302 else if (Ty == LLT::fixed_vector(4, S32))
6303 Opc = AArch64::ST2Twov4s;
6304 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6305 Opc = AArch64::ST2Twov2d;
6306 else if (Ty == S64 || Ty == P0)
6307 Opc = AArch64::ST1Twov1d;
6308 else
6309 llvm_unreachable("Unexpected type for st2!");
6310 selectVectorStoreIntrinsic(I, 2, Opc);
6311 break;
6312 }
6313 case Intrinsic::aarch64_neon_st3: {
6314 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6315 unsigned Opc;
6316 if (Ty == LLT::fixed_vector(8, S8))
6317 Opc = AArch64::ST3Threev8b;
6318 else if (Ty == LLT::fixed_vector(16, S8))
6319 Opc = AArch64::ST3Threev16b;
6320 else if (Ty == LLT::fixed_vector(4, S16))
6321 Opc = AArch64::ST3Threev4h;
6322 else if (Ty == LLT::fixed_vector(8, S16))
6323 Opc = AArch64::ST3Threev8h;
6324 else if (Ty == LLT::fixed_vector(2, S32))
6325 Opc = AArch64::ST3Threev2s;
6326 else if (Ty == LLT::fixed_vector(4, S32))
6327 Opc = AArch64::ST3Threev4s;
6328 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6329 Opc = AArch64::ST3Threev2d;
6330 else if (Ty == S64 || Ty == P0)
6331 Opc = AArch64::ST1Threev1d;
6332 else
6333 llvm_unreachable("Unexpected type for st3!");
6334 selectVectorStoreIntrinsic(I, 3, Opc);
6335 break;
6336 }
6337 case Intrinsic::aarch64_neon_st4: {
6338 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6339 unsigned Opc;
6340 if (Ty == LLT::fixed_vector(8, S8))
6341 Opc = AArch64::ST4Fourv8b;
6342 else if (Ty == LLT::fixed_vector(16, S8))
6343 Opc = AArch64::ST4Fourv16b;
6344 else if (Ty == LLT::fixed_vector(4, S16))
6345 Opc = AArch64::ST4Fourv4h;
6346 else if (Ty == LLT::fixed_vector(8, S16))
6347 Opc = AArch64::ST4Fourv8h;
6348 else if (Ty == LLT::fixed_vector(2, S32))
6349 Opc = AArch64::ST4Fourv2s;
6350 else if (Ty == LLT::fixed_vector(4, S32))
6351 Opc = AArch64::ST4Fourv4s;
6352 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6353 Opc = AArch64::ST4Fourv2d;
6354 else if (Ty == S64 || Ty == P0)
6355 Opc = AArch64::ST1Fourv1d;
6356 else
6357 llvm_unreachable("Unexpected type for st4!");
6358 selectVectorStoreIntrinsic(I, 4, Opc);
6359 break;
6360 }
6361 case Intrinsic::aarch64_neon_st2lane: {
6362 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6363 unsigned Opc;
6364 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6365 Opc = AArch64::ST2i8;
6366 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6367 Opc = AArch64::ST2i16;
6368 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6369 Opc = AArch64::ST2i32;
6370 else if (Ty == LLT::fixed_vector(2, S64) ||
6371 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6372 Opc = AArch64::ST2i64;
6373 else
6374 llvm_unreachable("Unexpected type for st2lane!");
6375 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6376 return false;
6377 break;
6378 }
6379 case Intrinsic::aarch64_neon_st3lane: {
6380 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6381 unsigned Opc;
6382 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6383 Opc = AArch64::ST3i8;
6384 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6385 Opc = AArch64::ST3i16;
6386 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6387 Opc = AArch64::ST3i32;
6388 else if (Ty == LLT::fixed_vector(2, S64) ||
6389 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6390 Opc = AArch64::ST3i64;
6391 else
6392 llvm_unreachable("Unexpected type for st3lane!");
6393 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6394 return false;
6395 break;
6396 }
6397 case Intrinsic::aarch64_neon_st4lane: {
6398 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6399 unsigned Opc;
6400 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6401 Opc = AArch64::ST4i8;
6402 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6403 Opc = AArch64::ST4i16;
6404 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6405 Opc = AArch64::ST4i32;
6406 else if (Ty == LLT::fixed_vector(2, S64) ||
6407 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6408 Opc = AArch64::ST4i64;
6409 else
6410 llvm_unreachable("Unexpected type for st4lane!");
6411 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6412 return false;
6413 break;
6414 }
6415 case Intrinsic::aarch64_mops_memset_tag: {
6416 // Transform
6417 // %dst:gpr(p0) = \
6418 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6419 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6420 // where %dst is updated, into
6421 // %Rd:GPR64common, %Rn:GPR64) = \
6422 // MOPSMemorySetTaggingPseudo \
6423 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6424 // where Rd and Rn are tied.
6425 // It is expected that %val has been extended to s64 in legalization.
6426 // Note that the order of the size/value operands are swapped.
6427
6428 Register DstDef = I.getOperand(0).getReg();
6429 // I.getOperand(1) is the intrinsic function
6430 Register DstUse = I.getOperand(2).getReg();
6431 Register ValUse = I.getOperand(3).getReg();
6432 Register SizeUse = I.getOperand(4).getReg();
6433
6434 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6435 // Therefore an additional virtual register is requried for the updated size
6436 // operand. This value is not accessible via the semantics of the intrinsic.
6437 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6438
6439 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6440 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6441 Memset.cloneMemRefs(I);
6443 break;
6444 }
6445 }
6446
6447 I.eraseFromParent();
6448 return true;
6449}
6450
6451bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6453 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6454
6455 switch (IntrinID) {
6456 default:
6457 break;
6458 case Intrinsic::aarch64_crypto_sha1h: {
6459 Register DstReg = I.getOperand(0).getReg();
6460 Register SrcReg = I.getOperand(2).getReg();
6461
6462 // FIXME: Should this be an assert?
6463 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6464 MRI.getType(SrcReg).getSizeInBits() != 32)
6465 return false;
6466
6467 // The operation has to happen on FPRs. Set up some new FPR registers for
6468 // the source and destination if they are on GPRs.
6469 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6470 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6471 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6472
6473 // Make sure the copy ends up getting constrained properly.
6474 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6475 AArch64::GPR32RegClass, MRI);
6476 }
6477
6478 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6479 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6480
6481 // Actually insert the instruction.
6482 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6483 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6484
6485 // Did we create a new register for the destination?
6486 if (DstReg != I.getOperand(0).getReg()) {
6487 // Yep. Copy the result of the instruction back into the original
6488 // destination.
6489 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6490 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6491 AArch64::GPR32RegClass, MRI);
6492 }
6493
6494 I.eraseFromParent();
6495 return true;
6496 }
6497 case Intrinsic::frameaddress:
6498 case Intrinsic::returnaddress: {
6499 MachineFunction &MF = *I.getParent()->getParent();
6500 MachineFrameInfo &MFI = MF.getFrameInfo();
6501
6502 unsigned Depth = I.getOperand(2).getImm();
6503 Register DstReg = I.getOperand(0).getReg();
6504 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6505
6506 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6507 if (!MFReturnAddr) {
6508 // Insert the copy from LR/X30 into the entry block, before it can be
6509 // clobbered by anything.
6510 MFI.setReturnAddressIsTaken(true);
6511 MFReturnAddr = getFunctionLiveInPhysReg(
6512 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6513 }
6514
6515 if (STI.hasPAuth()) {
6516 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6517 } else {
6518 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6519 MIB.buildInstr(AArch64::XPACLRI);
6520 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6521 }
6522
6523 I.eraseFromParent();
6524 return true;
6525 }
6526
6527 MFI.setFrameAddressIsTaken(true);
6528 Register FrameAddr(AArch64::FP);
6529 while (Depth--) {
6530 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6531 auto Ldr =
6532 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6534 FrameAddr = NextFrame;
6535 }
6536
6537 if (IntrinID == Intrinsic::frameaddress)
6538 MIB.buildCopy({DstReg}, {FrameAddr});
6539 else {
6540 MFI.setReturnAddressIsTaken(true);
6541
6542 if (STI.hasPAuth()) {
6543 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6544 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6545 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6546 } else {
6547 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6548 .addImm(1);
6549 MIB.buildInstr(AArch64::XPACLRI);
6550 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6551 }
6552 }
6553
6554 I.eraseFromParent();
6555 return true;
6556 }
6557 case Intrinsic::aarch64_neon_tbl2:
6558 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6559 return true;
6560 case Intrinsic::aarch64_neon_tbl3:
6561 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6562 false);
6563 return true;
6564 case Intrinsic::aarch64_neon_tbl4:
6565 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6566 return true;
6567 case Intrinsic::aarch64_neon_tbx2:
6568 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6569 return true;
6570 case Intrinsic::aarch64_neon_tbx3:
6571 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6572 return true;
6573 case Intrinsic::aarch64_neon_tbx4:
6574 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6575 return true;
6576 case Intrinsic::swift_async_context_addr:
6577 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6578 {Register(AArch64::FP)})
6579 .addImm(8)
6580 .addImm(0);
6582
6584 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6585 I.eraseFromParent();
6586 return true;
6587 }
6588 return false;
6589}
6590
6591// G_PTRAUTH_GLOBAL_VALUE lowering
6592//
6593// We have 3 lowering alternatives to choose from:
6594// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6595// If the GV doesn't need a GOT load (i.e., is locally defined)
6596// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6597//
6598// - LOADgotPAC: similar to LOADgot, with added PAC.
6599// If the GV needs a GOT load, materialize the pointer using the usual
6600// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6601// section is assumed to be read-only (for example, via relro mechanism). See
6602// LowerMOVaddrPAC.
6603//
6604// - LOADauthptrstatic: similar to LOADgot, but use a
6605// special stub slot instead of a GOT slot.
6606// Load a signed pointer for symbol 'sym' from a stub slot named
6607// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6608// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6609// .data with an
6610// @AUTH relocation. See LowerLOADauthptrstatic.
6611//
6612// All 3 are pseudos that are expand late to longer sequences: this lets us
6613// provide integrity guarantees on the to-be-signed intermediate values.
6614//
6615// LOADauthptrstatic is undesirable because it requires a large section filled
6616// with often similarly-signed pointers, making it a good harvesting target.
6617// Thus, it's only used for ptrauth references to extern_weak to avoid null
6618// checks.
6619
6620bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6622 Register DefReg = I.getOperand(0).getReg();
6623 Register Addr = I.getOperand(1).getReg();
6624 uint64_t Key = I.getOperand(2).getImm();
6625 Register AddrDisc = I.getOperand(3).getReg();
6626 uint64_t Disc = I.getOperand(4).getImm();
6627 int64_t Offset = 0;
6628
6629 if (Key > AArch64PACKey::LAST)
6630 report_fatal_error("key in ptrauth global out of range [0, " +
6631 Twine((int)AArch64PACKey::LAST) + "]");
6632
6633 // Blend only works if the integer discriminator is 16-bit wide.
6634 if (!isUInt<16>(Disc))
6636 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6637
6638 // Choosing between 3 lowering alternatives is target-specific.
6639 if (!STI.isTargetELF())
6640 report_fatal_error("ptrauth global lowering is only implemented for ELF");
6641
6642 if (!MRI.hasOneDef(Addr))
6643 return false;
6644
6645 // First match any offset we take from the real global.
6646 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6647 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6648 Register OffsetReg = DefMI->getOperand(2).getReg();
6649 if (!MRI.hasOneDef(OffsetReg))
6650 return false;
6651 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6652 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6653 return false;
6654
6655 Addr = DefMI->getOperand(1).getReg();
6656 if (!MRI.hasOneDef(Addr))
6657 return false;
6658
6659 DefMI = &*MRI.def_instr_begin(Addr);
6660 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6661 }
6662
6663 // We should be left with a genuine unauthenticated GlobalValue.
6664 const GlobalValue *GV;
6665 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6666 GV = DefMI->getOperand(1).getGlobal();
6668 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6669 GV = DefMI->getOperand(2).getGlobal();
6671 } else {
6672 return false;
6673 }
6674
6675 MachineIRBuilder MIB(I);
6676
6677 // Classify the reference to determine whether it needs a GOT load.
6678 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6679 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6680 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6681 "unsupported non-GOT op flags on ptrauth global reference");
6682 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6683 "unsupported non-GOT reference to weak ptrauth global");
6684
6685 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6686 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6687
6688 // Non-extern_weak:
6689 // - No GOT load needed -> MOVaddrPAC
6690 // - GOT load for non-extern_weak -> LOADgotPAC
6691 // Note that we disallow extern_weak refs to avoid null checks later.
6692 if (!GV->hasExternalWeakLinkage()) {
6693 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6694 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6695 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6697 .addImm(Key)
6698 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6699 .addImm(Disc)
6700 .constrainAllUses(TII, TRI, RBI);
6701 MIB.buildCopy(DefReg, Register(AArch64::X16));
6702 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6703 I.eraseFromParent();
6704 return true;
6705 }
6706
6707 // extern_weak -> LOADauthptrstatic
6708
6709 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6710 // offset alone as a pointer if the symbol wasn't available, which would
6711 // probably break null checks in users. Ptrauth complicates things further:
6712 // error out.
6713 if (Offset != 0)
6715 "unsupported non-zero offset in weak ptrauth global reference");
6716
6717 if (HasAddrDisc)
6718 report_fatal_error("unsupported weak addr-div ptrauth global");
6719
6720 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6721 .addGlobalAddress(GV, Offset)
6722 .addImm(Key)
6723 .addImm(Disc);
6724 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6725
6726 I.eraseFromParent();
6727 return true;
6728}
6729
6730void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6732 unsigned NumVec, unsigned Opc1,
6733 unsigned Opc2, bool isExt) {
6734 Register DstReg = I.getOperand(0).getReg();
6735 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6736
6737 // Create the REG_SEQUENCE
6739 for (unsigned i = 0; i < NumVec; i++)
6740 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6741 Register RegSeq = createQTuple(Regs, MIB);
6742
6743 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6745 if (isExt) {
6746 Register Reg = I.getOperand(2).getReg();
6747 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6748 } else
6749 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6751 I.eraseFromParent();
6752}
6753
6755AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6756 auto MaybeImmed = getImmedFromMO(Root);
6757 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6758 return std::nullopt;
6759 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6760 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6761}
6762
6764AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6765 auto MaybeImmed = getImmedFromMO(Root);
6766 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6767 return std::nullopt;
6768 uint64_t Enc = 31 - *MaybeImmed;
6769 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6770}
6771
6773AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6774 auto MaybeImmed = getImmedFromMO(Root);
6775 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6776 return std::nullopt;
6777 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6778 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6779}
6780
6782AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6783 auto MaybeImmed = getImmedFromMO(Root);
6784 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6785 return std::nullopt;
6786 uint64_t Enc = 63 - *MaybeImmed;
6787 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6788}
6789
6790/// Helper to select an immediate value that can be represented as a 12-bit
6791/// value shifted left by either 0 or 12. If it is possible to do so, return
6792/// the immediate and shift value. If not, return std::nullopt.
6793///
6794/// Used by selectArithImmed and selectNegArithImmed.
6796AArch64InstructionSelector::select12BitValueWithLeftShift(
6797 uint64_t Immed) const {
6798 unsigned ShiftAmt;
6799 if (Immed >> 12 == 0) {
6800 ShiftAmt = 0;
6801 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6802 ShiftAmt = 12;
6803 Immed = Immed >> 12;
6804 } else
6805 return std::nullopt;
6806
6807 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6808 return {{
6809 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6810 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6811 }};
6812}
6813
6814/// SelectArithImmed - Select an immediate value that can be represented as
6815/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6816/// Val set to the 12-bit value and Shift set to the shifter operand.
6818AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6819 // This function is called from the addsub_shifted_imm ComplexPattern,
6820 // which lists [imm] as the list of opcode it's interested in, however
6821 // we still need to check whether the operand is actually an immediate
6822 // here because the ComplexPattern opcode list is only used in
6823 // root-level opcode matching.
6824 auto MaybeImmed = getImmedFromMO(Root);
6825 if (MaybeImmed == std::nullopt)
6826 return std::nullopt;
6827 return select12BitValueWithLeftShift(*MaybeImmed);
6828}
6829
6830/// SelectNegArithImmed - As above, but negates the value before trying to
6831/// select it.
6833AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6834 // We need a register here, because we need to know if we have a 64 or 32
6835 // bit immediate.
6836 if (!Root.isReg())
6837 return std::nullopt;
6838 auto MaybeImmed = getImmedFromMO(Root);
6839 if (MaybeImmed == std::nullopt)
6840 return std::nullopt;
6841 uint64_t Immed = *MaybeImmed;
6842
6843 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6844 // have the opposite effect on the C flag, so this pattern mustn't match under
6845 // those circumstances.
6846 if (Immed == 0)
6847 return std::nullopt;
6848
6849 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6850 // the root.
6852 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6853 Immed = ~((uint32_t)Immed) + 1;
6854 else
6855 Immed = ~Immed + 1ULL;
6856
6857 if (Immed & 0xFFFFFFFFFF000000ULL)
6858 return std::nullopt;
6859
6860 Immed &= 0xFFFFFFULL;
6861 return select12BitValueWithLeftShift(Immed);
6862}
6863
6864/// Return true if it is worth folding MI into an extended register. That is,
6865/// if it's safe to pull it into the addressing mode of a load or store as a
6866/// shift.
6867bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6868 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6869 // Always fold if there is one use, or if we're optimizing for size.
6870 Register DefReg = MI.getOperand(0).getReg();
6871 if (MRI.hasOneNonDBGUse(DefReg) ||
6872 MI.getParent()->getParent()->getFunction().hasOptSize())
6873 return true;
6874
6875 // FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as
6876 // appropriate.
6877
6878 // We have a fastpath, so folding a shift in and potentially computing it
6879 // many times may be beneficial. Check if this is only used in memory ops.
6880 // If it is, then we should fold.
6881 return all_of(MRI.use_nodbg_instructions(DefReg),
6882 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6883}
6884
6886 switch (Type) {
6887 case AArch64_AM::SXTB:
6888 case AArch64_AM::SXTH:
6889 case AArch64_AM::SXTW:
6890 return true;
6891 default:
6892 return false;
6893 }
6894}
6895
6897AArch64InstructionSelector::selectExtendedSHL(
6899 unsigned SizeInBytes, bool WantsExt) const {
6900 assert(Base.isReg() && "Expected base to be a register operand");
6901 assert(Offset.isReg() && "Expected offset to be a register operand");
6902
6904 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6905
6906 unsigned OffsetOpc = OffsetInst->getOpcode();
6907 bool LookedThroughZExt = false;
6908 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6909 // Try to look through a ZEXT.
6910 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6911 return std::nullopt;
6912
6913 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6914 OffsetOpc = OffsetInst->getOpcode();
6915 LookedThroughZExt = true;
6916
6917 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6918 return std::nullopt;
6919 }
6920 // Make sure that the memory op is a valid size.
6921 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6922 if (LegalShiftVal == 0)
6923 return std::nullopt;
6924 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6925 return std::nullopt;
6926
6927 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6928 // register we will offset is the LHS, and the register containing the
6929 // constant is the RHS.
6930 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6931 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6932 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6933 if (!ValAndVReg) {
6934 // We didn't get a constant on the RHS. If the opcode is a shift, then
6935 // we're done.
6936 if (OffsetOpc == TargetOpcode::G_SHL)
6937 return std::nullopt;
6938
6939 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6940 std::swap(OffsetReg, ConstantReg);
6941 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6942 if (!ValAndVReg)
6943 return std::nullopt;
6944 }
6945
6946 // The value must fit into 3 bits, and must be positive. Make sure that is
6947 // true.
6948 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6949
6950 // Since we're going to pull this into a shift, the constant value must be
6951 // a power of 2. If we got a multiply, then we need to check this.
6952 if (OffsetOpc == TargetOpcode::G_MUL) {
6953 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6954 return std::nullopt;
6955
6956 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6957 ImmVal = Log2_32(ImmVal);
6958 }
6959
6960 if ((ImmVal & 0x7) != ImmVal)
6961 return std::nullopt;
6962
6963 // We are only allowed to shift by LegalShiftVal. This shift value is built
6964 // into the instruction, so we can't just use whatever we want.
6965 if (ImmVal != LegalShiftVal)
6966 return std::nullopt;
6967
6968 unsigned SignExtend = 0;
6969 if (WantsExt) {
6970 // Check if the offset is defined by an extend, unless we looked through a
6971 // G_ZEXT earlier.
6972 if (!LookedThroughZExt) {
6973 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
6974 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
6976 return std::nullopt;
6977
6978 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
6979 // We only support SXTW for signed extension here.
6980 if (SignExtend && Ext != AArch64_AM::SXTW)
6981 return std::nullopt;
6982 OffsetReg = ExtInst->getOperand(1).getReg();
6983 }
6984
6985 // Need a 32-bit wide register here.
6986 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
6987 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6988 }
6989
6990 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
6991 // offset. Signify that we are shifting by setting the shift flag to 1.
6992 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
6993 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
6994 [=](MachineInstrBuilder &MIB) {
6995 // Need to add both immediates here to make sure that they are both
6996 // added to the instruction.
6997 MIB.addImm(SignExtend);
6998 MIB.addImm(1);
6999 }}};
7000}
7001
7002/// This is used for computing addresses like this:
7003///
7004/// ldr x1, [x2, x3, lsl #3]
7005///
7006/// Where x2 is the base register, and x3 is an offset register. The shift-left
7007/// is a constant value specific to this load instruction. That is, we'll never
7008/// see anything other than a 3 here (which corresponds to the size of the
7009/// element being loaded.)
7011AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7012 MachineOperand &Root, unsigned SizeInBytes) const {
7013 if (!Root.isReg())
7014 return std::nullopt;
7016
7017 // We want to find something like this:
7018 //
7019 // val = G_CONSTANT LegalShiftVal
7020 // shift = G_SHL off_reg val
7021 // ptr = G_PTR_ADD base_reg shift
7022 // x = G_LOAD ptr
7023 //
7024 // And fold it into this addressing mode:
7025 //
7026 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7027
7028 // Check if we can find the G_PTR_ADD.
7029 MachineInstr *PtrAdd =
7030 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7031 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7032 return std::nullopt;
7033
7034 // Now, try to match an opcode which will match our specific offset.
7035 // We want a G_SHL or a G_MUL.
7036 MachineInstr *OffsetInst =
7038 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7039 OffsetInst->getOperand(0), SizeInBytes,
7040 /*WantsExt=*/false);
7041}
7042
7043/// This is used for computing addresses like this:
7044///
7045/// ldr x1, [x2, x3]
7046///
7047/// Where x2 is the base register, and x3 is an offset register.
7048///
7049/// When possible (or profitable) to fold a G_PTR_ADD into the address
7050/// calculation, this will do so. Otherwise, it will return std::nullopt.
7052AArch64InstructionSelector::selectAddrModeRegisterOffset(
7053 MachineOperand &Root) const {
7055
7056 // We need a GEP.
7057 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7058 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7059 return std::nullopt;
7060
7061 // If this is used more than once, let's not bother folding.
7062 // TODO: Check if they are memory ops. If they are, then we can still fold
7063 // without having to recompute anything.
7064 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7065 return std::nullopt;
7066
7067 // Base is the GEP's LHS, offset is its RHS.
7068 return {{[=](MachineInstrBuilder &MIB) {
7069 MIB.addUse(Gep->getOperand(1).getReg());
7070 },
7071 [=](MachineInstrBuilder &MIB) {
7072 MIB.addUse(Gep->getOperand(2).getReg());
7073 },
7074 [=](MachineInstrBuilder &MIB) {
7075 // Need to add both immediates here to make sure that they are both
7076 // added to the instruction.
7077 MIB.addImm(0);
7078 MIB.addImm(0);
7079 }}};
7080}
7081
7082/// This is intended to be equivalent to selectAddrModeXRO in
7083/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7085AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7086 unsigned SizeInBytes) const {
7088 if (!Root.isReg())
7089 return std::nullopt;
7090 MachineInstr *PtrAdd =
7091 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7092 if (!PtrAdd)
7093 return std::nullopt;
7094
7095 // Check for an immediates which cannot be encoded in the [base + imm]
7096 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7097 // end up with code like:
7098 //
7099 // mov x0, wide
7100 // add x1 base, x0
7101 // ldr x2, [x1, x0]
7102 //
7103 // In this situation, we can use the [base, xreg] addressing mode to save an
7104 // add/sub:
7105 //
7106 // mov x0, wide
7107 // ldr x2, [base, x0]
7108 auto ValAndVReg =
7110 if (ValAndVReg) {
7111 unsigned Scale = Log2_32(SizeInBytes);
7112 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7113
7114 // Skip immediates that can be selected in the load/store addresing
7115 // mode.
7116 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7117 ImmOff < (0x1000 << Scale))
7118 return std::nullopt;
7119
7120 // Helper lambda to decide whether or not it is preferable to emit an add.
7121 auto isPreferredADD = [](int64_t ImmOff) {
7122 // Constants in [0x0, 0xfff] can be encoded in an add.
7123 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7124 return true;
7125
7126 // Can it be encoded in an add lsl #12?
7127 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7128 return false;
7129
7130 // It can be encoded in an add lsl #12, but we may not want to. If it is
7131 // possible to select this as a single movz, then prefer that. A single
7132 // movz is faster than an add with a shift.
7133 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7134 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7135 };
7136
7137 // If the immediate can be encoded in a single add/sub, then bail out.
7138 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7139 return std::nullopt;
7140 }
7141
7142 // Try to fold shifts into the addressing mode.
7143 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7144 if (AddrModeFns)
7145 return AddrModeFns;
7146
7147 // If that doesn't work, see if it's possible to fold in registers from
7148 // a GEP.
7149 return selectAddrModeRegisterOffset(Root);
7150}
7151
7152/// This is used for computing addresses like this:
7153///
7154/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7155///
7156/// Where we have a 64-bit base register, a 32-bit offset register, and an
7157/// extend (which may or may not be signed).
7159AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7160 unsigned SizeInBytes) const {
7162
7163 MachineInstr *PtrAdd =
7164 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7165 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
7166 return std::nullopt;
7167
7168 MachineOperand &LHS = PtrAdd->getOperand(1);
7169 MachineOperand &RHS = PtrAdd->getOperand(2);
7170 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7171
7172 // The first case is the same as selectAddrModeXRO, except we need an extend.
7173 // In this case, we try to find a shift and extend, and fold them into the
7174 // addressing mode.
7175 //
7176 // E.g.
7177 //
7178 // off_reg = G_Z/S/ANYEXT ext_reg
7179 // val = G_CONSTANT LegalShiftVal
7180 // shift = G_SHL off_reg val
7181 // ptr = G_PTR_ADD base_reg shift
7182 // x = G_LOAD ptr
7183 //
7184 // In this case we can get a load like this:
7185 //
7186 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7187 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7188 SizeInBytes, /*WantsExt=*/true);
7189 if (ExtendedShl)
7190 return ExtendedShl;
7191
7192 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7193 //
7194 // e.g.
7195 // ldr something, [base_reg, ext_reg, sxtw]
7196 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
7197 return std::nullopt;
7198
7199 // Check if this is an extend. We'll get an extend type if it is.
7201 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7203 return std::nullopt;
7204
7205 // Need a 32-bit wide register.
7206 MachineIRBuilder MIB(*PtrAdd);
7207 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7208 AArch64::GPR32RegClass, MIB);
7209 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7210
7211 // Base is LHS, offset is ExtReg.
7212 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7213 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7214 [=](MachineInstrBuilder &MIB) {
7215 MIB.addImm(SignExtend);
7216 MIB.addImm(0);
7217 }}};
7218}
7219
7220/// Select a "register plus unscaled signed 9-bit immediate" address. This
7221/// should only match when there is an offset that is not valid for a scaled
7222/// immediate addressing mode. The "Size" argument is the size in bytes of the
7223/// memory reference, which is needed here to know what is valid for a scaled
7224/// immediate.
7226AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7227 unsigned Size) const {
7229 Root.getParent()->getParent()->getParent()->getRegInfo();
7230
7231 if (!Root.isReg())
7232 return std::nullopt;
7233
7234 if (!isBaseWithConstantOffset(Root, MRI))
7235 return std::nullopt;
7236
7237 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7238
7239 MachineOperand &OffImm = RootDef->getOperand(2);
7240 if (!OffImm.isReg())
7241 return std::nullopt;
7242 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7243 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7244 return std::nullopt;
7245 int64_t RHSC;
7246 MachineOperand &RHSOp1 = RHS->getOperand(1);
7247 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7248 return std::nullopt;
7249 RHSC = RHSOp1.getCImm()->getSExtValue();
7250
7251 if (RHSC >= -256 && RHSC < 256) {
7252 MachineOperand &Base = RootDef->getOperand(1);
7253 return {{
7254 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7255 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7256 }};
7257 }
7258 return std::nullopt;
7259}
7260
7262AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7263 unsigned Size,
7264 MachineRegisterInfo &MRI) const {
7265 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7266 return std::nullopt;
7267 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7268 if (Adrp.getOpcode() != AArch64::ADRP)
7269 return std::nullopt;
7270
7271 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7272 auto Offset = Adrp.getOperand(1).getOffset();
7273 if (Offset % Size != 0)
7274 return std::nullopt;
7275
7276 auto GV = Adrp.getOperand(1).getGlobal();
7277 if (GV->isThreadLocal())
7278 return std::nullopt;
7279
7280 auto &MF = *RootDef.getParent()->getParent();
7281 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7282 return std::nullopt;
7283
7284 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7285 MachineIRBuilder MIRBuilder(RootDef);
7286 Register AdrpReg = Adrp.getOperand(0).getReg();
7287 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7288 [=](MachineInstrBuilder &MIB) {
7289 MIB.addGlobalAddress(GV, Offset,
7290 OpFlags | AArch64II::MO_PAGEOFF |
7292 }}};
7293}
7294
7295/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7296/// "Size" argument is the size in bytes of the memory reference, which
7297/// determines the scale.
7299AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7300 unsigned Size) const {
7301 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7303
7304 if (!Root.isReg())
7305 return std::nullopt;
7306
7307 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7308 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7309 return {{
7310 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7311 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7312 }};
7313 }
7314
7316 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7317 if (CM == CodeModel::Small) {
7318 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7319 if (OpFns)
7320 return OpFns;
7321 }
7322
7323 if (isBaseWithConstantOffset(Root, MRI)) {
7324 MachineOperand &LHS = RootDef->getOperand(1);
7325 MachineOperand &RHS = RootDef->getOperand(2);
7326 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7327 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7328
7329 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7330 unsigned Scale = Log2_32(Size);
7331 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7332 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7333 return {{
7334 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7335 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7336 }};
7337
7338 return {{
7339 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7340 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7341 }};
7342 }
7343 }
7344
7345 // Before falling back to our general case, check if the unscaled
7346 // instructions can handle this. If so, that's preferable.
7347 if (selectAddrModeUnscaled(Root, Size))
7348 return std::nullopt;
7349
7350 return {{
7351 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7352 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7353 }};
7354}
7355
7356/// Given a shift instruction, return the correct shift type for that
7357/// instruction.
7359 switch (MI.getOpcode()) {
7360 default:
7362 case TargetOpcode::G_SHL:
7363 return AArch64_AM::LSL;
7364 case TargetOpcode::G_LSHR:
7365 return AArch64_AM::LSR;
7366 case TargetOpcode::G_ASHR:
7367 return AArch64_AM::ASR;
7368 case TargetOpcode::G_ROTR:
7369 return AArch64_AM::ROR;
7370 }
7371}
7372
7373/// Select a "shifted register" operand. If the value is not shifted, set the
7374/// shift operand to a default value of "lsl 0".
7376AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7377 bool AllowROR) const {
7378 if (!Root.isReg())
7379 return std::nullopt;
7381 Root.getParent()->getParent()->getParent()->getRegInfo();
7382
7383 // Check if the operand is defined by an instruction which corresponds to
7384 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7385 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7387 if (ShType == AArch64_AM::InvalidShiftExtend)
7388 return std::nullopt;
7389 if (ShType == AArch64_AM::ROR && !AllowROR)
7390 return std::nullopt;
7391 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
7392 return std::nullopt;
7393
7394 // Need an immediate on the RHS.
7395 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7396 auto Immed = getImmedFromMO(ShiftRHS);
7397 if (!Immed)
7398 return std::nullopt;
7399
7400 // We have something that we can fold. Fold in the shift's LHS and RHS into
7401 // the instruction.
7402 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7403 Register ShiftReg = ShiftLHS.getReg();
7404
7405 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7406 unsigned Val = *Immed & (NumBits - 1);
7407 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7408
7409 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7410 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7411}
7412
7413AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7414 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7415 unsigned Opc = MI.getOpcode();
7416
7417 // Handle explicit extend instructions first.
7418 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7419 unsigned Size;
7420 if (Opc == TargetOpcode::G_SEXT)
7421 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7422 else
7423 Size = MI.getOperand(2).getImm();
7424 assert(Size != 64 && "Extend from 64 bits?");
7425 switch (Size) {
7426 case 8:
7427 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7428 case 16:
7429 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7430 case 32:
7431 return AArch64_AM::SXTW;
7432 default:
7434 }
7435 }
7436
7437 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7438 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7439 assert(Size != 64 && "Extend from 64 bits?");
7440 switch (Size) {
7441 case 8:
7442 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7443 case 16:
7444 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7445 case 32:
7446 return AArch64_AM::UXTW;
7447 default:
7449 }
7450 }
7451
7452 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7453 // on the RHS.
7454 if (Opc != TargetOpcode::G_AND)
7456
7457 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7458 if (!MaybeAndMask)
7460 uint64_t AndMask = *MaybeAndMask;
7461 switch (AndMask) {
7462 default:
7464 case 0xFF:
7465 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7466 case 0xFFFF:
7467 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7468 case 0xFFFFFFFF:
7469 return AArch64_AM::UXTW;
7470 }
7471}
7472
7473Register AArch64InstructionSelector::moveScalarRegClass(
7474 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7475 MachineRegisterInfo &MRI = *MIB.getMRI();
7476 auto Ty = MRI.getType(Reg);
7477 assert(!Ty.isVector() && "Expected scalars only!");
7478 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7479 return Reg;
7480
7481 // Create a copy and immediately select it.
7482 // FIXME: We should have an emitCopy function?
7483 auto Copy = MIB.buildCopy({&RC}, {Reg});
7484 selectCopy(*Copy, TII, MRI, TRI, RBI);
7485 return Copy.getReg(0);
7486}
7487
7488/// Select an "extended register" operand. This operand folds in an extend
7489/// followed by an optional left shift.
7491AArch64InstructionSelector::selectArithExtendedRegister(
7492 MachineOperand &Root) const {
7493 if (!Root.isReg())
7494 return std::nullopt;
7496 Root.getParent()->getParent()->getParent()->getRegInfo();
7497
7498 uint64_t ShiftVal = 0;
7499 Register ExtReg;
7501 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7502 if (!RootDef)
7503 return std::nullopt;
7504
7505 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
7506 return std::nullopt;
7507
7508 // Check if we can fold a shift and an extend.
7509 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7510 // Look for a constant on the RHS of the shift.
7511 MachineOperand &RHS = RootDef->getOperand(2);
7512 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7513 if (!MaybeShiftVal)
7514 return std::nullopt;
7515 ShiftVal = *MaybeShiftVal;
7516 if (ShiftVal > 4)
7517 return std::nullopt;
7518 // Look for a valid extend instruction on the LHS of the shift.
7519 MachineOperand &LHS = RootDef->getOperand(1);
7520 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7521 if (!ExtDef)
7522 return std::nullopt;
7523 Ext = getExtendTypeForInst(*ExtDef, MRI);
7525 return std::nullopt;
7526 ExtReg = ExtDef->getOperand(1).getReg();
7527 } else {
7528 // Didn't get a shift. Try just folding an extend.
7529 Ext = getExtendTypeForInst(*RootDef, MRI);
7531 return std::nullopt;
7532 ExtReg = RootDef->getOperand(1).getReg();
7533
7534 // If we have a 32 bit instruction which zeroes out the high half of a
7535 // register, we get an implicit zero extend for free. Check if we have one.
7536 // FIXME: We actually emit the extend right now even though we don't have
7537 // to.
7538 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7539 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7540 if (isDef32(*ExtInst))
7541 return std::nullopt;
7542 }
7543 }
7544
7545 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7546 // copy.
7547 MachineIRBuilder MIB(*RootDef);
7548 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7549
7550 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7551 [=](MachineInstrBuilder &MIB) {
7552 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7553 }}};
7554}
7555
7557AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7558 if (!Root.isReg())
7559 return std::nullopt;
7561 Root.getParent()->getParent()->getParent()->getRegInfo();
7562
7563 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7564 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7565 STI.isLittleEndian())
7566 Extract =
7567 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7568 if (!Extract)
7569 return std::nullopt;
7570
7571 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7572 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7573 Register ExtReg = Extract->MI->getOperand(2).getReg();
7574 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7575 }
7576 }
7577 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7578 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7580 Extract->MI->getOperand(2).getReg(), MRI);
7581 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7582 LaneIdx->Value.getSExtValue() == 1) {
7583 Register ExtReg = Extract->MI->getOperand(1).getReg();
7584 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7585 }
7586 }
7587
7588 return std::nullopt;
7589}
7590
7591void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7592 const MachineInstr &MI,
7593 int OpIdx) const {
7594 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7595 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7596 "Expected G_CONSTANT");
7597 std::optional<int64_t> CstVal =
7598 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7599 assert(CstVal && "Expected constant value");
7600 MIB.addImm(*CstVal);
7601}
7602
7603void AArch64InstructionSelector::renderLogicalImm32(
7604 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7605 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7606 "Expected G_CONSTANT");
7607 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7609 MIB.addImm(Enc);
7610}
7611
7612void AArch64InstructionSelector::renderLogicalImm64(
7613 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7614 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7615 "Expected G_CONSTANT");
7616 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7618 MIB.addImm(Enc);
7619}
7620
7621void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7622 const MachineInstr &MI,
7623 int OpIdx) const {
7624 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7625 "Expected G_UBSANTRAP");
7626 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7627}
7628
7629void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7630 const MachineInstr &MI,
7631 int OpIdx) const {
7632 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7633 "Expected G_FCONSTANT");
7634 MIB.addImm(
7635 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7636}
7637
7638void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7639 const MachineInstr &MI,
7640 int OpIdx) const {
7641 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7642 "Expected G_FCONSTANT");
7643 MIB.addImm(
7644 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7645}
7646
7647void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7648 const MachineInstr &MI,
7649 int OpIdx) const {
7650 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7651 "Expected G_FCONSTANT");
7652 MIB.addImm(
7653 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7654}
7655
7656void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7657 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7658 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7659 "Expected G_FCONSTANT");
7661 .getFPImm()
7662 ->getValueAPF()
7663 .bitcastToAPInt()
7664 .getZExtValue()));
7665}
7666
7667bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7668 const MachineInstr &MI, unsigned NumBytes) const {
7669 if (!MI.mayLoadOrStore())
7670 return false;
7671 assert(MI.hasOneMemOperand() &&
7672 "Expected load/store to have only one mem op!");
7673 return (*MI.memoperands_begin())->getSize() == NumBytes;
7674}
7675
7676bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7677 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7678 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7679 return false;
7680
7681 // Only return true if we know the operation will zero-out the high half of
7682 // the 64-bit register. Truncates can be subregister copies, which don't
7683 // zero out the high bits. Copies and other copy-like instructions can be
7684 // fed by truncates, or could be lowered as subregister copies.
7685 switch (MI.getOpcode()) {
7686 default:
7687 return true;
7688 case TargetOpcode::COPY:
7689 case TargetOpcode::G_BITCAST:
7690 case TargetOpcode::G_TRUNC:
7691 case TargetOpcode::G_PHI:
7692 return false;
7693 }
7694}
7695
7696
7697// Perform fixups on the given PHI instruction's operands to force them all
7698// to be the same as the destination regbank.
7700 const AArch64RegisterBankInfo &RBI) {
7701 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7702 Register DstReg = MI.getOperand(0).getReg();
7703 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7704 assert(DstRB && "Expected PHI dst to have regbank assigned");
7705 MachineIRBuilder MIB(MI);
7706
7707 // Go through each operand and ensure it has the same regbank.
7708 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7709 if (!MO.isReg())
7710 continue;
7711 Register OpReg = MO.getReg();
7712 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7713 if (RB != DstRB) {
7714 // Insert a cross-bank copy.
7715 auto *OpDef = MRI.getVRegDef(OpReg);
7716 const LLT &Ty = MRI.getType(OpReg);
7717 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7718
7719 // Any instruction we insert must appear after all PHIs in the block
7720 // for the block to be valid MIR.
7721 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
7722 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
7723 InsertPt = OpDefBB.getFirstNonPHI();
7724 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
7725 auto Copy = MIB.buildCopy(Ty, OpReg);
7726 MRI.setRegBank(Copy.getReg(0), *DstRB);
7727 MO.setReg(Copy.getReg(0));
7728 }
7729 }
7730}
7731
7732void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7733 // We're looking for PHIs, build a list so we don't invalidate iterators.
7736 for (auto &BB : MF) {
7737 for (auto &MI : BB) {
7738 if (MI.getOpcode() == TargetOpcode::G_PHI)
7739 Phis.emplace_back(&MI);
7740 }
7741 }
7742
7743 for (auto *MI : Phis) {
7744 // We need to do some work here if the operand types are < 16 bit and they
7745 // are split across fpr/gpr banks. Since all types <32b on gpr
7746 // end up being assigned gpr32 regclasses, we can end up with PHIs here
7747 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7748 // be selecting heterogenous regbanks for operands if possible, but we
7749 // still need to be able to deal with it here.
7750 //
7751 // To fix this, if we have a gpr-bank operand < 32b in size and at least
7752 // one other operand is on the fpr bank, then we add cross-bank copies
7753 // to homogenize the operand banks. For simplicity the bank that we choose
7754 // to settle on is whatever bank the def operand has. For example:
7755 //
7756 // %endbb:
7757 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7758 // =>
7759 // %bb2:
7760 // ...
7761 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7762 // ...
7763 // %endbb:
7764 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7765 bool HasGPROp = false, HasFPROp = false;
7766 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
7767 if (!MO.isReg())
7768 continue;
7769 const LLT &Ty = MRI.getType(MO.getReg());
7770 if (!Ty.isValid() || !Ty.isScalar())
7771 break;
7772 if (Ty.getSizeInBits() >= 32)
7773 break;
7774 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
7775 // If for some reason we don't have a regbank yet. Don't try anything.
7776 if (!RB)
7777 break;
7778
7779 if (RB->getID() == AArch64::GPRRegBankID)
7780 HasGPROp = true;
7781 else
7782 HasFPROp = true;
7783 }
7784 // We have heterogenous regbanks, need to fixup.
7785 if (HasGPROp && HasFPROp)
7786 fixupPHIOpBanks(*MI, MRI, RBI);
7787 }
7788}
7789
7790namespace llvm {
7793 AArch64Subtarget &Subtarget,
7795 return new AArch64InstructionSelector(TM, Subtarget, RBI);
7796}
7797}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
This file declares the targeting of the RegisterBankInfo class for AArch64.
MachineBasicBlock & MBB
static const LLT S64
static const LLT S32
static const LLT S16
static const LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:275
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:760
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:765
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:768
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:766
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:773
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:767
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
bool isIntPredicate() const
Definition: InstrTypes.h:865
bool isUnsigned() const
Definition: InstrTypes.h:1013
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:2977
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:319
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:316
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:161
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:41
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1686
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
Definition: Constants.cpp:1745
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
TypeSize getValue() const
Set of metadata that should be preserved when using BuildMI().
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:155
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
Key
PAL metadata keys.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:897
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:307
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
Definition: Utils.cpp:1624
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:432
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.