Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 5900, column 63
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "AArch64GlobalISelUtils.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "MCTargetDesc/AArch64MCTargetDesc.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineFunction.h"
33#include "llvm/CodeGen/MachineInstr.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineMemOperand.h"
36#include "llvm/CodeGen/MachineOperand.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetOpcodes.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/DerivedTypes.h"
41#include "llvm/IR/Instructions.h"
42#include "llvm/IR/PatternMatch.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/raw_ostream.h"
48
49#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
50
51using namespace llvm;
52using namespace MIPatternMatch;
53using namespace AArch64GISelUtils;
54
55namespace llvm {
56class BlockFrequencyInfo;
57class ProfileSummaryInfo;
58}
59
60namespace {
61
62#define GET_GLOBALISEL_PREDICATE_BITSET
63#include "AArch64GenGlobalISel.inc"
64#undef GET_GLOBALISEL_PREDICATE_BITSET
65
66class AArch64InstructionSelector : public InstructionSelector {
67public:
68 AArch64InstructionSelector(const AArch64TargetMachine &TM,
69 const AArch64Subtarget &STI,
70 const AArch64RegisterBankInfo &RBI);
71
72 bool select(MachineInstr &I) override;
73 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
74
75 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
76 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
77 BlockFrequencyInfo *BFI) override {
78 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
79 MIB.setMF(MF);
80
81 // hasFnAttribute() is expensive to call on every BRCOND selection, so
82 // cache it here for each run of the selector.
83 ProduceNonFlagSettingCondBr =
84 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
85 MFReturnAddr = Register();
86
87 processPHIs(MF);
88 }
89
90private:
91 /// tblgen-erated 'select' implementation, used as the initial selector for
92 /// the patterns that don't require complex C++.
93 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
94
95 // A lowering phase that runs before any selection attempts.
96 // Returns true if the instruction was modified.
97 bool preISelLower(MachineInstr &I);
98
99 // An early selection function that runs before the selectImpl() call.
100 bool earlySelect(MachineInstr &I);
101
102 // Do some preprocessing of G_PHIs before we begin selection.
103 void processPHIs(MachineFunction &MF);
104
105 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
106
107 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
108 bool contractCrossBankCopyIntoStore(MachineInstr &I,
109 MachineRegisterInfo &MRI);
110
111 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
112
113 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
114 MachineRegisterInfo &MRI) const;
115 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
116 MachineRegisterInfo &MRI) const;
117
118 ///@{
119 /// Helper functions for selectCompareBranch.
120 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
121 MachineIRBuilder &MIB) const;
122 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
123 MachineIRBuilder &MIB) const;
124 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
125 MachineIRBuilder &MIB) const;
126 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
127 MachineBasicBlock *DstMBB,
128 MachineIRBuilder &MIB) const;
129 ///@}
130
131 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
132 MachineRegisterInfo &MRI);
133
134 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
135 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
136
137 // Helper to generate an equivalent of scalar_to_vector into a new register,
138 // returned via 'Dst'.
139 MachineInstr *emitScalarToVector(unsigned EltSize,
140 const TargetRegisterClass *DstRC,
141 Register Scalar,
142 MachineIRBuilder &MIRBuilder) const;
143
144 /// Emit a lane insert into \p DstReg, or a new vector register if None is
145 /// provided.
146 ///
147 /// The lane inserted into is defined by \p LaneIdx. The vector source
148 /// register is given by \p SrcReg. The register containing the element is
149 /// given by \p EltReg.
150 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
151 Register EltReg, unsigned LaneIdx,
152 const RegisterBank &RB,
153 MachineIRBuilder &MIRBuilder) const;
154
155 /// Emit a sequence of instructions representing a constant \p CV for a
156 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
157 ///
158 /// \returns the last instruction in the sequence on success, and nullptr
159 /// otherwise.
160 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
161 MachineIRBuilder &MIRBuilder,
162 MachineRegisterInfo &MRI);
163
164 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
165 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
166 MachineRegisterInfo &MRI);
167 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
168 /// SUBREG_TO_REG.
169 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
170 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
171 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
172 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
173
174 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
176 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
177 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
178
179 /// Helper function to select vector load intrinsics like
180 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
181 /// \p Opc is the opcode that the selected instruction should use.
182 /// \p NumVecs is the number of vector destinations for the instruction.
183 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
184 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
185 MachineInstr &I);
186 bool selectIntrinsicWithSideEffects(MachineInstr &I,
187 MachineRegisterInfo &MRI);
188 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
189 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
190 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
191 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
192 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
194 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
195 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
196
197 unsigned emitConstantPoolEntry(const Constant *CPVal,
198 MachineFunction &MF) const;
199 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
200 MachineIRBuilder &MIRBuilder) const;
201
202 // Emit a vector concat operation.
203 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
204 Register Op2,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit an integer compare between LHS and RHS, which checks for Predicate.
208 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
209 MachineOperand &Predicate,
210 MachineIRBuilder &MIRBuilder) const;
211
212 /// Emit a floating point comparison between \p LHS and \p RHS.
213 /// \p Pred if given is the intended predicate to use.
214 MachineInstr *emitFPCompare(Register LHS, Register RHS,
215 MachineIRBuilder &MIRBuilder,
216 Optional<CmpInst::Predicate> = None) const;
217
218 MachineInstr *emitInstr(unsigned Opcode,
219 std::initializer_list<llvm::DstOp> DstOps,
220 std::initializer_list<llvm::SrcOp> SrcOps,
221 MachineIRBuilder &MIRBuilder,
222 const ComplexRendererFns &RenderFns = None) const;
223 /// Helper function to emit an add or sub instruction.
224 ///
225 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
226 /// in a specific order.
227 ///
228 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
229 ///
230 /// \code
231 /// const std::array<std::array<unsigned, 2>, 4> Table {
232 /// {{AArch64::ADDXri, AArch64::ADDWri},
233 /// {AArch64::ADDXrs, AArch64::ADDWrs},
234 /// {AArch64::ADDXrr, AArch64::ADDWrr},
235 /// {AArch64::SUBXri, AArch64::SUBWri},
236 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
237 /// \endcode
238 ///
239 /// Each row in the table corresponds to a different addressing mode. Each
240 /// column corresponds to a different register size.
241 ///
242 /// \attention Rows must be structured as follows:
243 /// - Row 0: The ri opcode variants
244 /// - Row 1: The rs opcode variants
245 /// - Row 2: The rr opcode variants
246 /// - Row 3: The ri opcode variants for negative immediates
247 /// - Row 4: The rx opcode variants
248 ///
249 /// \attention Columns must be structured as follows:
250 /// - Column 0: The 64-bit opcode variants
251 /// - Column 1: The 32-bit opcode variants
252 ///
253 /// \p Dst is the destination register of the binop to emit.
254 /// \p LHS is the left-hand operand of the binop to emit.
255 /// \p RHS is the right-hand operand of the binop to emit.
256 MachineInstr *emitAddSub(
257 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
258 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
259 MachineIRBuilder &MIRBuilder) const;
260 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
261 MachineOperand &RHS,
262 MachineIRBuilder &MIRBuilder) const;
263 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
266 MachineIRBuilder &MIRBuilder) const;
267 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
268 MachineIRBuilder &MIRBuilder) const;
269 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
270 MachineIRBuilder &MIRBuilder) const;
271 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
272 AArch64CC::CondCode CC,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
275 const RegisterBank &DstRB, LLT ScalarTy,
276 Register VecReg, unsigned LaneIdx,
277 MachineIRBuilder &MIRBuilder) const;
278
279 /// Emit a CSet for an integer compare.
280 ///
281 /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
282 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
283 MachineIRBuilder &MIRBuilder,
284 Register SrcReg = AArch64::WZR) const;
285 /// Emit a CSet for a FP compare.
286 ///
287 /// \p Dst is expected to be a 32-bit scalar register.
288 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
289 MachineIRBuilder &MIRBuilder) const;
290
291 /// Emit the overflow op for \p Opcode.
292 ///
293 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
294 /// G_USUBO, etc.
295 std::pair<MachineInstr *, AArch64CC::CondCode>
296 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
297 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
298
299 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
300 /// \p IsNegative is true if the test should be "not zero".
301 /// This will also optimize the test bit instruction when possible.
302 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
303 MachineBasicBlock *DstMBB,
304 MachineIRBuilder &MIB) const;
305
306 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
307 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
308 MachineBasicBlock *DestMBB,
309 MachineIRBuilder &MIB) const;
310
311 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
312 // We use these manually instead of using the importer since it doesn't
313 // support SDNodeXForm.
314 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
315 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
316 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
317 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
318
319 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
320 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
321 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
322
323 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
324 unsigned Size) const;
325
326 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
327 return selectAddrModeUnscaled(Root, 1);
328 }
329 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
330 return selectAddrModeUnscaled(Root, 2);
331 }
332 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
333 return selectAddrModeUnscaled(Root, 4);
334 }
335 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
336 return selectAddrModeUnscaled(Root, 8);
337 }
338 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
339 return selectAddrModeUnscaled(Root, 16);
340 }
341
342 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
343 /// from complex pattern matchers like selectAddrModeIndexed().
344 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
345 MachineRegisterInfo &MRI) const;
346
347 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
348 unsigned Size) const;
349 template <int Width>
350 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
351 return selectAddrModeIndexed(Root, Width / 8);
352 }
353
354 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
355 const MachineRegisterInfo &MRI) const;
356 ComplexRendererFns
357 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
358 unsigned SizeInBytes) const;
359
360 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
361 /// or not a shift + extend should be folded into an addressing mode. Returns
362 /// None when this is not profitable or possible.
363 ComplexRendererFns
364 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
365 MachineOperand &Offset, unsigned SizeInBytes,
366 bool WantsExt) const;
367 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
368 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
369 unsigned SizeInBytes) const;
370 template <int Width>
371 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
372 return selectAddrModeXRO(Root, Width / 8);
373 }
374
375 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
376 unsigned SizeInBytes) const;
377 template <int Width>
378 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
379 return selectAddrModeWRO(Root, Width / 8);
380 }
381
382 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
383 bool AllowROR = false) const;
384
385 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
386 return selectShiftedRegister(Root);
387 }
388
389 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
390 return selectShiftedRegister(Root, true);
391 }
392
393 /// Given an extend instruction, determine the correct shift-extend type for
394 /// that instruction.
395 ///
396 /// If the instruction is going to be used in a load or store, pass
397 /// \p IsLoadStore = true.
398 AArch64_AM::ShiftExtendType
399 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
400 bool IsLoadStore = false) const;
401
402 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
403 ///
404 /// \returns Either \p Reg if no change was necessary, or the new register
405 /// created by moving \p Reg.
406 ///
407 /// Note: This uses emitCopy right now.
408 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
409 MachineIRBuilder &MIB) const;
410
411 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
412
413 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
414 int OpIdx = -1) const;
415 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
416 int OpIdx = -1) const;
417 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
418 int OpIdx = -1) const;
419 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
420 int OpIdx = -1) const;
421 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
422 int OpIdx = -1) const;
423 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
424 int OpIdx = -1) const;
425
426 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
427 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
428
429 // Optimization methods.
430 bool tryOptSelect(MachineInstr &MI);
431 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
432 MachineOperand &Predicate,
433 MachineIRBuilder &MIRBuilder) const;
434
435 /// Return true if \p MI is a load or store of \p NumBytes bytes.
436 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
437
438 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
439 /// register zeroed out. In other words, the result of MI has been explicitly
440 /// zero extended.
441 bool isDef32(const MachineInstr &MI) const;
442
443 const AArch64TargetMachine &TM;
444 const AArch64Subtarget &STI;
445 const AArch64InstrInfo &TII;
446 const AArch64RegisterInfo &TRI;
447 const AArch64RegisterBankInfo &RBI;
448
449 bool ProduceNonFlagSettingCondBr = false;
450
451 // Some cached values used during selection.
452 // We use LR as a live-in register, and we keep track of it here as it can be
453 // clobbered by calls.
454 Register MFReturnAddr;
455
456 MachineIRBuilder MIB;
457
458#define GET_GLOBALISEL_PREDICATES_DECL
459#include "AArch64GenGlobalISel.inc"
460#undef GET_GLOBALISEL_PREDICATES_DECL
461
462// We declare the temporaries used by selectImpl() in the class to minimize the
463// cost of constructing placeholder values.
464#define GET_GLOBALISEL_TEMPORARIES_DECL
465#include "AArch64GenGlobalISel.inc"
466#undef GET_GLOBALISEL_TEMPORARIES_DECL
467};
468
469} // end anonymous namespace
470
471#define GET_GLOBALISEL_IMPL
472#include "AArch64GenGlobalISel.inc"
473#undef GET_GLOBALISEL_IMPL
474
475AArch64InstructionSelector::AArch64InstructionSelector(
476 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
477 const AArch64RegisterBankInfo &RBI)
478 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
479 TRI(*STI.getRegisterInfo()), RBI(RBI),
480#define GET_GLOBALISEL_PREDICATES_INIT
481#include "AArch64GenGlobalISel.inc"
482#undef GET_GLOBALISEL_PREDICATES_INIT
483#define GET_GLOBALISEL_TEMPORARIES_INIT
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_INIT
486{
487}
488
489// FIXME: This should be target-independent, inferred from the types declared
490// for each class in the bank.
491static const TargetRegisterClass *
492getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
493 const RegisterBankInfo &RBI,
494 bool GetAllRegSet = false) {
495 if (RB.getID() == AArch64::GPRRegBankID) {
496 if (Ty.getSizeInBits() <= 32)
497 return GetAllRegSet ? &AArch64::GPR32allRegClass
498 : &AArch64::GPR32RegClass;
499 if (Ty.getSizeInBits() == 64)
500 return GetAllRegSet ? &AArch64::GPR64allRegClass
501 : &AArch64::GPR64RegClass;
502 if (Ty.getSizeInBits() == 128)
503 return &AArch64::XSeqPairsClassRegClass;
504 return nullptr;
505 }
506
507 if (RB.getID() == AArch64::FPRRegBankID) {
508 switch (Ty.getSizeInBits()) {
509 case 8:
510 return &AArch64::FPR8RegClass;
511 case 16:
512 return &AArch64::FPR16RegClass;
513 case 32:
514 return &AArch64::FPR32RegClass;
515 case 64:
516 return &AArch64::FPR64RegClass;
517 case 128:
518 return &AArch64::FPR128RegClass;
519 }
520 return nullptr;
521 }
522
523 return nullptr;
524}
525
526/// Given a register bank, and size in bits, return the smallest register class
527/// that can represent that combination.
528static const TargetRegisterClass *
529getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
530 bool GetAllRegSet = false) {
531 unsigned RegBankID = RB.getID();
532
533 if (RegBankID == AArch64::GPRRegBankID) {
534 if (SizeInBits <= 32)
535 return GetAllRegSet ? &AArch64::GPR32allRegClass
536 : &AArch64::GPR32RegClass;
537 if (SizeInBits == 64)
538 return GetAllRegSet ? &AArch64::GPR64allRegClass
539 : &AArch64::GPR64RegClass;
540 if (SizeInBits == 128)
541 return &AArch64::XSeqPairsClassRegClass;
542 }
543
544 if (RegBankID == AArch64::FPRRegBankID) {
545 switch (SizeInBits) {
546 default:
547 return nullptr;
548 case 8:
549 return &AArch64::FPR8RegClass;
550 case 16:
551 return &AArch64::FPR16RegClass;
552 case 32:
553 return &AArch64::FPR32RegClass;
554 case 64:
555 return &AArch64::FPR64RegClass;
556 case 128:
557 return &AArch64::FPR128RegClass;
558 }
559 }
560
561 return nullptr;
562}
563
564/// Returns the correct subregister to use for a given register class.
565static bool getSubRegForClass(const TargetRegisterClass *RC,
566 const TargetRegisterInfo &TRI, unsigned &SubReg) {
567 switch (TRI.getRegSizeInBits(*RC)) {
568 case 8:
569 SubReg = AArch64::bsub;
570 break;
571 case 16:
572 SubReg = AArch64::hsub;
573 break;
574 case 32:
575 if (RC != &AArch64::FPR32RegClass)
576 SubReg = AArch64::sub_32;
577 else
578 SubReg = AArch64::ssub;
579 break;
580 case 64:
581 SubReg = AArch64::dsub;
582 break;
583 default:
584 LLVM_DEBUG(do { } while (false)
585 dbgs() << "Couldn't find appropriate subregister for register class.")do { } while (false);
586 return false;
587 }
588
589 return true;
590}
591
592/// Returns the minimum size the given register bank can hold.
593static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
594 switch (RB.getID()) {
595 case AArch64::GPRRegBankID:
596 return 32;
597 case AArch64::FPRRegBankID:
598 return 8;
599 default:
600 llvm_unreachable("Tried to get minimum size for unknown register bank.")__builtin_unreachable();
601 }
602}
603
604/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
605/// Helper function for functions like createDTuple and createQTuple.
606///
607/// \p RegClassIDs - The list of register class IDs available for some tuple of
608/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
609/// expected to contain between 2 and 4 tuple classes.
610///
611/// \p SubRegs - The list of subregister classes associated with each register
612/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
613/// subregister class. The index of each subregister class is expected to
614/// correspond with the index of each register class.
615///
616/// \returns Either the destination register of REG_SEQUENCE instruction that
617/// was created, or the 0th element of \p Regs if \p Regs contains a single
618/// element.
619static Register createTuple(ArrayRef<Register> Regs,
620 const unsigned RegClassIDs[],
621 const unsigned SubRegs[], MachineIRBuilder &MIB) {
622 unsigned NumRegs = Regs.size();
623 if (NumRegs == 1)
624 return Regs[0];
625 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast<void> (0))
626 "Only support between two and 4 registers in a tuple!")(static_cast<void> (0));
627 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
628 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
629 auto RegSequence =
630 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
631 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
632 RegSequence.addUse(Regs[I]);
633 RegSequence.addImm(SubRegs[I]);
634 }
635 return RegSequence.getReg(0);
636}
637
638/// Create a tuple of D-registers using the registers in \p Regs.
639static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
640 static const unsigned RegClassIDs[] = {
641 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
642 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
643 AArch64::dsub2, AArch64::dsub3};
644 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
645}
646
647/// Create a tuple of Q-registers using the registers in \p Regs.
648static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
649 static const unsigned RegClassIDs[] = {
650 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
651 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
652 AArch64::qsub2, AArch64::qsub3};
653 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
654}
655
656static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
657 auto &MI = *Root.getParent();
658 auto &MBB = *MI.getParent();
659 auto &MF = *MBB.getParent();
660 auto &MRI = MF.getRegInfo();
661 uint64_t Immed;
662 if (Root.isImm())
663 Immed = Root.getImm();
664 else if (Root.isCImm())
665 Immed = Root.getCImm()->getZExtValue();
666 else if (Root.isReg()) {
667 auto ValAndVReg =
668 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
669 if (!ValAndVReg)
670 return None;
671 Immed = ValAndVReg->Value.getSExtValue();
672 } else
673 return None;
674 return Immed;
675}
676
677/// Check whether \p I is a currently unsupported binary operation:
678/// - it has an unsized type
679/// - an operand is not a vreg
680/// - all operands are not in the same bank
681/// These are checks that should someday live in the verifier, but right now,
682/// these are mostly limitations of the aarch64 selector.
683static bool unsupportedBinOp(const MachineInstr &I,
684 const AArch64RegisterBankInfo &RBI,
685 const MachineRegisterInfo &MRI,
686 const AArch64RegisterInfo &TRI) {
687 LLT Ty = MRI.getType(I.getOperand(0).getReg());
688 if (!Ty.isValid()) {
689 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { } while (false);
690 return true;
691 }
692
693 const RegisterBank *PrevOpBank = nullptr;
694 for (auto &MO : I.operands()) {
695 // FIXME: Support non-register operands.
696 if (!MO.isReg()) {
697 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { } while (false);
698 return true;
699 }
700
701 // FIXME: Can generic operations have physical registers operands? If
702 // so, this will need to be taught about that, and we'll need to get the
703 // bank out of the minimal class for the register.
704 // Either way, this needs to be documented (and possibly verified).
705 if (!Register::isVirtualRegister(MO.getReg())) {
706 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { } while (false);
707 return true;
708 }
709
710 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
711 if (!OpBank) {
712 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { } while (false);
713 return true;
714 }
715
716 if (PrevOpBank && OpBank != PrevOpBank) {
717 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { } while (false);
718 return true;
719 }
720 PrevOpBank = OpBank;
721 }
722 return false;
723}
724
725/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
726/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
727/// and of size \p OpSize.
728/// \returns \p GenericOpc if the combination is unsupported.
729static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
730 unsigned OpSize) {
731 switch (RegBankID) {
732 case AArch64::GPRRegBankID:
733 if (OpSize == 32) {
734 switch (GenericOpc) {
735 case TargetOpcode::G_SHL:
736 return AArch64::LSLVWr;
737 case TargetOpcode::G_LSHR:
738 return AArch64::LSRVWr;
739 case TargetOpcode::G_ASHR:
740 return AArch64::ASRVWr;
741 default:
742 return GenericOpc;
743 }
744 } else if (OpSize == 64) {
745 switch (GenericOpc) {
746 case TargetOpcode::G_PTR_ADD:
747 return AArch64::ADDXrr;
748 case TargetOpcode::G_SHL:
749 return AArch64::LSLVXr;
750 case TargetOpcode::G_LSHR:
751 return AArch64::LSRVXr;
752 case TargetOpcode::G_ASHR:
753 return AArch64::ASRVXr;
754 default:
755 return GenericOpc;
756 }
757 }
758 break;
759 case AArch64::FPRRegBankID:
760 switch (OpSize) {
761 case 32:
762 switch (GenericOpc) {
763 case TargetOpcode::G_FADD:
764 return AArch64::FADDSrr;
765 case TargetOpcode::G_FSUB:
766 return AArch64::FSUBSrr;
767 case TargetOpcode::G_FMUL:
768 return AArch64::FMULSrr;
769 case TargetOpcode::G_FDIV:
770 return AArch64::FDIVSrr;
771 default:
772 return GenericOpc;
773 }
774 case 64:
775 switch (GenericOpc) {
776 case TargetOpcode::G_FADD:
777 return AArch64::FADDDrr;
778 case TargetOpcode::G_FSUB:
779 return AArch64::FSUBDrr;
780 case TargetOpcode::G_FMUL:
781 return AArch64::FMULDrr;
782 case TargetOpcode::G_FDIV:
783 return AArch64::FDIVDrr;
784 case TargetOpcode::G_OR:
785 return AArch64::ORRv8i8;
786 default:
787 return GenericOpc;
788 }
789 }
790 break;
791 }
792 return GenericOpc;
793}
794
795/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
796/// appropriate for the (value) register bank \p RegBankID and of memory access
797/// size \p OpSize. This returns the variant with the base+unsigned-immediate
798/// addressing mode (e.g., LDRXui).
799/// \returns \p GenericOpc if the combination is unsupported.
800static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
801 unsigned OpSize) {
802 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
803 switch (RegBankID) {
804 case AArch64::GPRRegBankID:
805 switch (OpSize) {
806 case 8:
807 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
808 case 16:
809 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
810 case 32:
811 return isStore ? AArch64::STRWui : AArch64::LDRWui;
812 case 64:
813 return isStore ? AArch64::STRXui : AArch64::LDRXui;
814 }
815 break;
816 case AArch64::FPRRegBankID:
817 switch (OpSize) {
818 case 8:
819 return isStore ? AArch64::STRBui : AArch64::LDRBui;
820 case 16:
821 return isStore ? AArch64::STRHui : AArch64::LDRHui;
822 case 32:
823 return isStore ? AArch64::STRSui : AArch64::LDRSui;
824 case 64:
825 return isStore ? AArch64::STRDui : AArch64::LDRDui;
826 }
827 break;
828 }
829 return GenericOpc;
830}
831
832#ifndef NDEBUG1
833/// Helper function that verifies that we have a valid copy at the end of
834/// selectCopy. Verifies that the source and dest have the expected sizes and
835/// then returns true.
836static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
837 const MachineRegisterInfo &MRI,
838 const TargetRegisterInfo &TRI,
839 const RegisterBankInfo &RBI) {
840 const Register DstReg = I.getOperand(0).getReg();
841 const Register SrcReg = I.getOperand(1).getReg();
842 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
843 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
844
845 // Make sure the size of the source and dest line up.
846 assert((static_cast<void> (0))
847 (DstSize == SrcSize ||(static_cast<void> (0))
848 // Copies are a mean to setup initial types, the number of(static_cast<void> (0))
849 // bits may not exactly match.(static_cast<void> (0))
850 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast<void> (0))
851 // Copies are a mean to copy bits around, as long as we are(static_cast<void> (0))
852 // on the same register class, that's fine. Otherwise, that(static_cast<void> (0))
853 // means we need some SUBREG_TO_REG or AND & co.(static_cast<void> (0))
854 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast<void> (0))
855 "Copy with different width?!")(static_cast<void> (0));
856
857 // Check the size of the destination.
858 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast<void> (0))
859 "GPRs cannot get more than 64-bit width values")(static_cast<void> (0));
860
861 return true;
862}
863#endif
864
865/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
866/// to \p *To.
867///
868/// E.g "To = COPY SrcReg:SubReg"
869static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
870 const RegisterBankInfo &RBI, Register SrcReg,
871 const TargetRegisterClass *To, unsigned SubReg) {
872 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast<void> (0));
873 assert(To && "Destination register class cannot be null")(static_cast<void> (0));
874 assert(SubReg && "Expected a valid subregister")(static_cast<void> (0));
875
876 MachineIRBuilder MIB(I);
877 auto SubRegCopy =
878 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
879 MachineOperand &RegOp = I.getOperand(1);
880 RegOp.setReg(SubRegCopy.getReg(0));
881
882 // It's possible that the destination register won't be constrained. Make
883 // sure that happens.
884 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
885 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
886
887 return true;
888}
889
890/// Helper function to get the source and destination register classes for a
891/// copy. Returns a std::pair containing the source register class for the
892/// copy, and the destination register class for the copy. If a register class
893/// cannot be determined, then it will be nullptr.
894static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
895getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
896 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
897 const RegisterBankInfo &RBI) {
898 Register DstReg = I.getOperand(0).getReg();
899 Register SrcReg = I.getOperand(1).getReg();
900 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
901 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
902 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
903 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
904
905 // Special casing for cross-bank copies of s1s. We can technically represent
906 // a 1-bit value with any size of register. The minimum size for a GPR is 32
907 // bits. So, we need to put the FPR on 32 bits as well.
908 //
909 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
910 // then we can pull it into the helpers that get the appropriate class for a
911 // register bank. Or make a new helper that carries along some constraint
912 // information.
913 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
914 SrcSize = DstSize = 32;
915
916 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
917 getMinClassForRegBank(DstRegBank, DstSize, true)};
918}
919
920static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
921 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
922 const RegisterBankInfo &RBI) {
923 Register DstReg = I.getOperand(0).getReg();
924 Register SrcReg = I.getOperand(1).getReg();
925 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
926 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
927
928 // Find the correct register classes for the source and destination registers.
929 const TargetRegisterClass *SrcRC;
930 const TargetRegisterClass *DstRC;
931 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
932
933 if (!DstRC) {
934 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { } while (false)
935 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { } while (false);
936 return false;
937 }
938
939 // A couple helpers below, for making sure that the copy we produce is valid.
940
941 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
942 // to verify that the src and dst are the same size, since that's handled by
943 // the SUBREG_TO_REG.
944 bool KnownValid = false;
945
946 // Returns true, or asserts if something we don't expect happens. Instead of
947 // returning true, we return isValidCopy() to ensure that we verify the
948 // result.
949 auto CheckCopy = [&]() {
950 // If we have a bitcast or something, we can't have physical registers.
951 assert((I.isCopy() ||(static_cast<void> (0))
952 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast<void> (0))
953 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast<void> (0))
954 "No phys reg on generic operator!")(static_cast<void> (0));
955 bool ValidCopy = true;
956#ifndef NDEBUG1
957 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
958 assert(ValidCopy && "Invalid copy.")(static_cast<void> (0));
959#endif
960 (void)KnownValid;
961 return ValidCopy;
962 };
963
964 // Is this a copy? If so, then we may need to insert a subregister copy.
965 if (I.isCopy()) {
966 // Yes. Check if there's anything to fix up.
967 if (!SrcRC) {
968 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { } while (false);
969 return false;
970 }
971
972 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
973 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
974 unsigned SubReg;
975
976 // If the source bank doesn't support a subregister copy small enough,
977 // then we first need to copy to the destination bank.
978 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
979 const TargetRegisterClass *DstTempRC =
980 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
981 getSubRegForClass(DstRC, TRI, SubReg);
982
983 MachineIRBuilder MIB(I);
984 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
985 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
986 } else if (SrcSize > DstSize) {
987 // If the source register is bigger than the destination we need to
988 // perform a subregister copy.
989 const TargetRegisterClass *SubRegRC =
990 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
991 getSubRegForClass(SubRegRC, TRI, SubReg);
992 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
993 } else if (DstSize > SrcSize) {
994 // If the destination register is bigger than the source we need to do
995 // a promotion using SUBREG_TO_REG.
996 const TargetRegisterClass *PromotionRC =
997 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
998 getSubRegForClass(SrcRC, TRI, SubReg);
999
1000 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1001 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1002 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1003 .addImm(0)
1004 .addUse(SrcReg)
1005 .addImm(SubReg);
1006 MachineOperand &RegOp = I.getOperand(1);
1007 RegOp.setReg(PromoteReg);
1008
1009 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1010 KnownValid = true;
1011 }
1012
1013 // If the destination is a physical register, then there's nothing to
1014 // change, so we're done.
1015 if (Register::isPhysicalRegister(DstReg))
1016 return CheckCopy();
1017 }
1018
1019 // No need to constrain SrcReg. It will get constrained when we hit another
1020 // of its use or its defs. Copies do not have constraints.
1021 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1022 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { } while (false)
1023 << " operand\n")do { } while (false);
1024 return false;
1025 }
1026
1027 // If this a GPR ZEXT that we want to just reduce down into a copy.
1028 // The sizes will be mismatched with the source < 32b but that's ok.
1029 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1030 I.setDesc(TII.get(AArch64::COPY));
1031 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast<void> (0));
1032 return selectCopy(I, TII, MRI, TRI, RBI);
1033 }
1034
1035 I.setDesc(TII.get(AArch64::COPY));
1036 return CheckCopy();
1037}
1038
1039static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1040 if (!DstTy.isScalar() || !SrcTy.isScalar())
1041 return GenericOpc;
1042
1043 const unsigned DstSize = DstTy.getSizeInBits();
1044 const unsigned SrcSize = SrcTy.getSizeInBits();
1045
1046 switch (DstSize) {
1047 case 32:
1048 switch (SrcSize) {
1049 case 32:
1050 switch (GenericOpc) {
1051 case TargetOpcode::G_SITOFP:
1052 return AArch64::SCVTFUWSri;
1053 case TargetOpcode::G_UITOFP:
1054 return AArch64::UCVTFUWSri;
1055 case TargetOpcode::G_FPTOSI:
1056 return AArch64::FCVTZSUWSr;
1057 case TargetOpcode::G_FPTOUI:
1058 return AArch64::FCVTZUUWSr;
1059 default:
1060 return GenericOpc;
1061 }
1062 case 64:
1063 switch (GenericOpc) {
1064 case TargetOpcode::G_SITOFP:
1065 return AArch64::SCVTFUXSri;
1066 case TargetOpcode::G_UITOFP:
1067 return AArch64::UCVTFUXSri;
1068 case TargetOpcode::G_FPTOSI:
1069 return AArch64::FCVTZSUWDr;
1070 case TargetOpcode::G_FPTOUI:
1071 return AArch64::FCVTZUUWDr;
1072 default:
1073 return GenericOpc;
1074 }
1075 default:
1076 return GenericOpc;
1077 }
1078 case 64:
1079 switch (SrcSize) {
1080 case 32:
1081 switch (GenericOpc) {
1082 case TargetOpcode::G_SITOFP:
1083 return AArch64::SCVTFUWDri;
1084 case TargetOpcode::G_UITOFP:
1085 return AArch64::UCVTFUWDri;
1086 case TargetOpcode::G_FPTOSI:
1087 return AArch64::FCVTZSUXSr;
1088 case TargetOpcode::G_FPTOUI:
1089 return AArch64::FCVTZUUXSr;
1090 default:
1091 return GenericOpc;
1092 }
1093 case 64:
1094 switch (GenericOpc) {
1095 case TargetOpcode::G_SITOFP:
1096 return AArch64::SCVTFUXDri;
1097 case TargetOpcode::G_UITOFP:
1098 return AArch64::UCVTFUXDri;
1099 case TargetOpcode::G_FPTOSI:
1100 return AArch64::FCVTZSUXDr;
1101 case TargetOpcode::G_FPTOUI:
1102 return AArch64::FCVTZUUXDr;
1103 default:
1104 return GenericOpc;
1105 }
1106 default:
1107 return GenericOpc;
1108 }
1109 default:
1110 return GenericOpc;
1111 };
1112 return GenericOpc;
1113}
1114
1115MachineInstr *
1116AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1117 Register False, AArch64CC::CondCode CC,
1118 MachineIRBuilder &MIB) const {
1119 MachineRegisterInfo &MRI = *MIB.getMRI();
1120 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast<void> (0))
1121 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast<void> (0))
1122 "Expected both select operands to have the same regbank?")(static_cast<void> (0));
1123 LLT Ty = MRI.getType(True);
1124 if (Ty.isVector())
1125 return nullptr;
1126 const unsigned Size = Ty.getSizeInBits();
1127 assert((Size == 32 || Size == 64) &&(static_cast<void> (0))
1128 "Expected 32 bit or 64 bit select only?")(static_cast<void> (0));
1129 const bool Is32Bit = Size == 32;
1130 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1131 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1132 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1133 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1134 return &*FCSel;
1135 }
1136
1137 // By default, we'll try and emit a CSEL.
1138 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1139 bool Optimized = false;
1140 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1141 &Optimized](Register &Reg, Register &OtherReg,
1142 bool Invert) {
1143 if (Optimized)
1144 return false;
1145
1146 // Attempt to fold:
1147 //
1148 // %sub = G_SUB 0, %x
1149 // %select = G_SELECT cc, %reg, %sub
1150 //
1151 // Into:
1152 // %select = CSNEG %reg, %x, cc
1153 Register MatchReg;
1154 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1155 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1156 Reg = MatchReg;
1157 if (Invert) {
1158 CC = AArch64CC::getInvertedCondCode(CC);
1159 std::swap(Reg, OtherReg);
1160 }
1161 return true;
1162 }
1163
1164 // Attempt to fold:
1165 //
1166 // %xor = G_XOR %x, -1
1167 // %select = G_SELECT cc, %reg, %xor
1168 //
1169 // Into:
1170 // %select = CSINV %reg, %x, cc
1171 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1172 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1173 Reg = MatchReg;
1174 if (Invert) {
1175 CC = AArch64CC::getInvertedCondCode(CC);
1176 std::swap(Reg, OtherReg);
1177 }
1178 return true;
1179 }
1180
1181 // Attempt to fold:
1182 //
1183 // %add = G_ADD %x, 1
1184 // %select = G_SELECT cc, %reg, %add
1185 //
1186 // Into:
1187 // %select = CSINC %reg, %x, cc
1188 if (mi_match(Reg, MRI,
1189 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1190 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1191 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1192 Reg = MatchReg;
1193 if (Invert) {
1194 CC = AArch64CC::getInvertedCondCode(CC);
1195 std::swap(Reg, OtherReg);
1196 }
1197 return true;
1198 }
1199
1200 return false;
1201 };
1202
1203 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1204 // true/false values are constants.
1205 // FIXME: All of these patterns already exist in tablegen. We should be
1206 // able to import these.
1207 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1208 &Optimized]() {
1209 if (Optimized)
1210 return false;
1211 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1212 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1213 if (!TrueCst && !FalseCst)
1214 return false;
1215
1216 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1217 if (TrueCst && FalseCst) {
1218 int64_t T = TrueCst->Value.getSExtValue();
1219 int64_t F = FalseCst->Value.getSExtValue();
1220
1221 if (T == 0 && F == 1) {
1222 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1223 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1224 True = ZReg;
1225 False = ZReg;
1226 return true;
1227 }
1228
1229 if (T == 0 && F == -1) {
1230 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1231 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1232 True = ZReg;
1233 False = ZReg;
1234 return true;
1235 }
1236 }
1237
1238 if (TrueCst) {
1239 int64_t T = TrueCst->Value.getSExtValue();
1240 if (T == 1) {
1241 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1242 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1243 True = False;
1244 False = ZReg;
1245 CC = AArch64CC::getInvertedCondCode(CC);
1246 return true;
1247 }
1248
1249 if (T == -1) {
1250 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1251 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1252 True = False;
1253 False = ZReg;
1254 CC = AArch64CC::getInvertedCondCode(CC);
1255 return true;
1256 }
1257 }
1258
1259 if (FalseCst) {
1260 int64_t F = FalseCst->Value.getSExtValue();
1261 if (F == 1) {
1262 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1263 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1264 False = ZReg;
1265 return true;
1266 }
1267
1268 if (F == -1) {
1269 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1270 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1271 False = ZReg;
1272 return true;
1273 }
1274 }
1275 return false;
1276 };
1277
1278 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1279 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1280 Optimized |= TryOptSelectCst();
1281 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1282 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1283 return &*SelectInst;
1284}
1285
1286static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1287 switch (P) {
1288 default:
1289 llvm_unreachable("Unknown condition code!")__builtin_unreachable();
1290 case CmpInst::ICMP_NE:
1291 return AArch64CC::NE;
1292 case CmpInst::ICMP_EQ:
1293 return AArch64CC::EQ;
1294 case CmpInst::ICMP_SGT:
1295 return AArch64CC::GT;
1296 case CmpInst::ICMP_SGE:
1297 return AArch64CC::GE;
1298 case CmpInst::ICMP_SLT:
1299 return AArch64CC::LT;
1300 case CmpInst::ICMP_SLE:
1301 return AArch64CC::LE;
1302 case CmpInst::ICMP_UGT:
1303 return AArch64CC::HI;
1304 case CmpInst::ICMP_UGE:
1305 return AArch64CC::HS;
1306 case CmpInst::ICMP_ULT:
1307 return AArch64CC::LO;
1308 case CmpInst::ICMP_ULE:
1309 return AArch64CC::LS;
1310 }
1311}
1312
1313/// Return a register which can be used as a bit to test in a TB(N)Z.
1314static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1315 MachineRegisterInfo &MRI) {
1316 assert(Reg.isValid() && "Expected valid register!")(static_cast<void> (0));
1317 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1318 unsigned Opc = MI->getOpcode();
1319
1320 if (!MI->getOperand(0).isReg() ||
1321 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1322 break;
1323
1324 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1325 //
1326 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1327 // on the truncated x is the same as the bit number on x.
1328 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1329 Opc == TargetOpcode::G_TRUNC) {
1330 Register NextReg = MI->getOperand(1).getReg();
1331 // Did we find something worth folding?
1332 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1333 break;
1334
1335 // NextReg is worth folding. Keep looking.
1336 Reg = NextReg;
1337 continue;
1338 }
1339
1340 // Attempt to find a suitable operation with a constant on one side.
1341 Optional<uint64_t> C;
1342 Register TestReg;
1343 switch (Opc) {
1344 default:
1345 break;
1346 case TargetOpcode::G_AND:
1347 case TargetOpcode::G_XOR: {
1348 TestReg = MI->getOperand(1).getReg();
1349 Register ConstantReg = MI->getOperand(2).getReg();
1350 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1351 if (!VRegAndVal) {
1352 // AND commutes, check the other side for a constant.
1353 // FIXME: Can we canonicalize the constant so that it's always on the
1354 // same side at some point earlier?
1355 std::swap(ConstantReg, TestReg);
1356 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1357 }
1358 if (VRegAndVal)
1359 C = VRegAndVal->Value.getSExtValue();
1360 break;
1361 }
1362 case TargetOpcode::G_ASHR:
1363 case TargetOpcode::G_LSHR:
1364 case TargetOpcode::G_SHL: {
1365 TestReg = MI->getOperand(1).getReg();
1366 auto VRegAndVal =
1367 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1368 if (VRegAndVal)
1369 C = VRegAndVal->Value.getSExtValue();
1370 break;
1371 }
1372 }
1373
1374 // Didn't find a constant or viable register. Bail out of the loop.
1375 if (!C || !TestReg.isValid())
1376 break;
1377
1378 // We found a suitable instruction with a constant. Check to see if we can
1379 // walk through the instruction.
1380 Register NextReg;
1381 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1382 switch (Opc) {
1383 default:
1384 break;
1385 case TargetOpcode::G_AND:
1386 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1387 if ((*C >> Bit) & 1)
1388 NextReg = TestReg;
1389 break;
1390 case TargetOpcode::G_SHL:
1391 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1392 // the type of the register.
1393 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1394 NextReg = TestReg;
1395 Bit = Bit - *C;
1396 }
1397 break;
1398 case TargetOpcode::G_ASHR:
1399 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1400 // in x
1401 NextReg = TestReg;
1402 Bit = Bit + *C;
1403 if (Bit >= TestRegSize)
1404 Bit = TestRegSize - 1;
1405 break;
1406 case TargetOpcode::G_LSHR:
1407 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1408 if ((Bit + *C) < TestRegSize) {
1409 NextReg = TestReg;
1410 Bit = Bit + *C;
1411 }
1412 break;
1413 case TargetOpcode::G_XOR:
1414 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1415 // appropriate.
1416 //
1417 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1418 //
1419 // tbz x', b -> tbnz x, b
1420 //
1421 // Because x' only has the b-th bit set if x does not.
1422 if ((*C >> Bit) & 1)
1423 Invert = !Invert;
1424 NextReg = TestReg;
1425 break;
1426 }
1427
1428 // Check if we found anything worth folding.
1429 if (!NextReg.isValid())
1430 return Reg;
1431 Reg = NextReg;
1432 }
1433
1434 return Reg;
1435}
1436
1437MachineInstr *AArch64InstructionSelector::emitTestBit(
1438 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1439 MachineIRBuilder &MIB) const {
1440 assert(TestReg.isValid())(static_cast<void> (0));
1441 assert(ProduceNonFlagSettingCondBr &&(static_cast<void> (0))
1442 "Cannot emit TB(N)Z with speculation tracking!")(static_cast<void> (0));
1443 MachineRegisterInfo &MRI = *MIB.getMRI();
1444
1445 // Attempt to optimize the test bit by walking over instructions.
1446 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1447 LLT Ty = MRI.getType(TestReg);
1448 unsigned Size = Ty.getSizeInBits();
1449 assert(!Ty.isVector() && "Expected a scalar!")(static_cast<void> (0));
1450 assert(Bit < 64 && "Bit is too large!")(static_cast<void> (0));
1451
1452 // When the test register is a 64-bit register, we have to narrow to make
1453 // TBNZW work.
1454 bool UseWReg = Bit < 32;
1455 unsigned NecessarySize = UseWReg ? 32 : 64;
1456 if (Size != NecessarySize)
1457 TestReg = moveScalarRegClass(
1458 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1459 MIB);
1460
1461 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1462 {AArch64::TBZW, AArch64::TBNZW}};
1463 unsigned Opc = OpcTable[UseWReg][IsNegative];
1464 auto TestBitMI =
1465 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1466 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1467 return &*TestBitMI;
1468}
1469
1470bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1471 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1472 MachineIRBuilder &MIB) const {
1473 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast<void> (0));
1474 // Given something like this:
1475 //
1476 // %x = ...Something...
1477 // %one = G_CONSTANT i64 1
1478 // %zero = G_CONSTANT i64 0
1479 // %and = G_AND %x, %one
1480 // %cmp = G_ICMP intpred(ne), %and, %zero
1481 // %cmp_trunc = G_TRUNC %cmp
1482 // G_BRCOND %cmp_trunc, %bb.3
1483 //
1484 // We want to try and fold the AND into the G_BRCOND and produce either a
1485 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1486 //
1487 // In this case, we'd get
1488 //
1489 // TBNZ %x %bb.3
1490 //
1491
1492 // Check if the AND has a constant on its RHS which we can use as a mask.
1493 // If it's a power of 2, then it's the same as checking a specific bit.
1494 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1495 auto MaybeBit = getConstantVRegValWithLookThrough(
1496 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1497 if (!MaybeBit)
1498 return false;
1499
1500 int32_t Bit = MaybeBit->Value.exactLogBase2();
1501 if (Bit < 0)
1502 return false;
1503
1504 Register TestReg = AndInst.getOperand(1).getReg();
1505
1506 // Emit a TB(N)Z.
1507 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1508 return true;
1509}
1510
1511MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1512 bool IsNegative,
1513 MachineBasicBlock *DestMBB,
1514 MachineIRBuilder &MIB) const {
1515 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast<void> (0));
1516 MachineRegisterInfo &MRI = *MIB.getMRI();
1517 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast<void> (0))
1518 AArch64::GPRRegBankID &&(static_cast<void> (0))
1519 "Expected GPRs only?")(static_cast<void> (0));
1520 auto Ty = MRI.getType(CompareReg);
1521 unsigned Width = Ty.getSizeInBits();
1522 assert(!Ty.isVector() && "Expected scalar only?")(static_cast<void> (0));
1523 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast<void> (0));
1524 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1525 {AArch64::CBNZW, AArch64::CBNZX}};
1526 unsigned Opc = OpcTable[IsNegative][Width == 64];
1527 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1528 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1529 return &*BranchMI;
1530}
1531
1532bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1533 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1534 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast<void> (0));
1535 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast<void> (0));
1536 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1537 // totally clean. Some of them require two branches to implement.
1538 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1539 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1540 Pred);
1541 AArch64CC::CondCode CC1, CC2;
1542 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1543 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1544 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1545 if (CC2 != AArch64CC::AL)
1546 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1547 I.eraseFromParent();
1548 return true;
1549}
1550
1551bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1552 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1553 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast<void> (0));
1554 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast<void> (0));
1555 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1556 //
1557 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1558 // instructions will not be produced, as they are conditional branch
1559 // instructions that do not set flags.
1560 if (!ProduceNonFlagSettingCondBr)
1561 return false;
1562
1563 MachineRegisterInfo &MRI = *MIB.getMRI();
1564 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1565 auto Pred =
1566 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1567 Register LHS = ICmp.getOperand(2).getReg();
1568 Register RHS = ICmp.getOperand(3).getReg();
1569
1570 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1571 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1572 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1573
1574 // When we can emit a TB(N)Z, prefer that.
1575 //
1576 // Handle non-commutative condition codes first.
1577 // Note that we don't want to do this when we have a G_AND because it can
1578 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1579 if (VRegAndVal && !AndInst) {
1580 int64_t C = VRegAndVal->Value.getSExtValue();
1581
1582 // When we have a greater-than comparison, we can just test if the msb is
1583 // zero.
1584 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1585 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1586 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1587 I.eraseFromParent();
1588 return true;
1589 }
1590
1591 // When we have a less than comparison, we can just test if the msb is not
1592 // zero.
1593 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1594 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1595 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1596 I.eraseFromParent();
1597 return true;
1598 }
1599 }
1600
1601 // Attempt to handle commutative condition codes. Right now, that's only
1602 // eq/ne.
1603 if (ICmpInst::isEquality(Pred)) {
1604 if (!VRegAndVal) {
1605 std::swap(RHS, LHS);
1606 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1607 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1608 }
1609
1610 if (VRegAndVal && VRegAndVal->Value == 0) {
1611 // If there's a G_AND feeding into this branch, try to fold it away by
1612 // emitting a TB(N)Z instead.
1613 //
1614 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1615 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1616 // would be redundant.
1617 if (AndInst &&
1618 tryOptAndIntoCompareBranch(
1619 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1620 I.eraseFromParent();
1621 return true;
1622 }
1623
1624 // Otherwise, try to emit a CB(N)Z instead.
1625 auto LHSTy = MRI.getType(LHS);
1626 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1627 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1628 I.eraseFromParent();
1629 return true;
1630 }
1631 }
1632 }
1633
1634 return false;
1635}
1636
1637bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1638 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1639 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast<void> (0));
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast<void> (0));
1641 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1642 return true;
1643
1644 // Couldn't optimize. Emit a compare + a Bcc.
1645 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1646 auto PredOp = ICmp.getOperand(1);
1647 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1648 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1649 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1650 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1651 I.eraseFromParent();
1652 return true;
1653}
1654
1655bool AArch64InstructionSelector::selectCompareBranch(
1656 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1657 Register CondReg = I.getOperand(0).getReg();
1658 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1659 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1660 CondReg = CCMI->getOperand(1).getReg();
1661 CCMI = MRI.getVRegDef(CondReg);
1662 }
1663
1664 // Try to select the G_BRCOND using whatever is feeding the condition if
1665 // possible.
1666 unsigned CCMIOpc = CCMI->getOpcode();
1667 if (CCMIOpc == TargetOpcode::G_FCMP)
1668 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1669 if (CCMIOpc == TargetOpcode::G_ICMP)
1670 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1671
1672 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1673 // instructions will not be produced, as they are conditional branch
1674 // instructions that do not set flags.
1675 if (ProduceNonFlagSettingCondBr) {
1676 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1677 I.getOperand(1).getMBB(), MIB);
1678 I.eraseFromParent();
1679 return true;
1680 }
1681
1682 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1683 auto TstMI =
1684 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1685 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1686 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1687 .addImm(AArch64CC::EQ)
1688 .addMBB(I.getOperand(1).getMBB());
1689 I.eraseFromParent();
1690 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1691}
1692
1693/// Returns the element immediate value of a vector shift operand if found.
1694/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1695static Optional<int64_t> getVectorShiftImm(Register Reg,
1696 MachineRegisterInfo &MRI) {
1697 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast<void> (0));
1698 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1699 assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast<void> (0));
1700 return getAArch64VectorSplatScalar(*OpMI, MRI);
1701}
1702
1703/// Matches and returns the shift immediate value for a SHL instruction given
1704/// a shift operand.
1705static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1706 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1707 if (!ShiftImm)
1708 return None;
1709 // Check the immediate is in range for a SHL.
1710 int64_t Imm = *ShiftImm;
1711 if (Imm < 0)
1712 return None;
1713 switch (SrcTy.getElementType().getSizeInBits()) {
1714 default:
1715 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { } while (false);
1716 return None;
1717 case 8:
1718 if (Imm > 7)
1719 return None;
1720 break;
1721 case 16:
1722 if (Imm > 15)
1723 return None;
1724 break;
1725 case 32:
1726 if (Imm > 31)
1727 return None;
1728 break;
1729 case 64:
1730 if (Imm > 63)
1731 return None;
1732 break;
1733 }
1734 return Imm;
1735}
1736
1737bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1738 MachineRegisterInfo &MRI) {
1739 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast<void> (0));
1740 Register DstReg = I.getOperand(0).getReg();
1741 const LLT Ty = MRI.getType(DstReg);
1742 Register Src1Reg = I.getOperand(1).getReg();
1743 Register Src2Reg = I.getOperand(2).getReg();
1744
1745 if (!Ty.isVector())
1746 return false;
1747
1748 // Check if we have a vector of constants on RHS that we can select as the
1749 // immediate form.
1750 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1751
1752 unsigned Opc = 0;
1753 if (Ty == LLT::fixed_vector(2, 64)) {
1754 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1755 } else if (Ty == LLT::fixed_vector(4, 32)) {
1756 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1757 } else if (Ty == LLT::fixed_vector(2, 32)) {
1758 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1759 } else if (Ty == LLT::fixed_vector(4, 16)) {
1760 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1761 } else if (Ty == LLT::fixed_vector(8, 16)) {
1762 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1763 } else if (Ty == LLT::fixed_vector(16, 8)) {
1764 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1765 } else if (Ty == LLT::fixed_vector(8, 8)) {
1766 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1767 } else {
1768 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { } while (false);
1769 return false;
1770 }
1771
1772 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1773 if (ImmVal)
1774 Shl.addImm(*ImmVal);
1775 else
1776 Shl.addUse(Src2Reg);
1777 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1778 I.eraseFromParent();
1779 return true;
1780}
1781
1782bool AArch64InstructionSelector::selectVectorAshrLshr(
1783 MachineInstr &I, MachineRegisterInfo &MRI) {
1784 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast<void> (0))
1785 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast<void> (0));
1786 Register DstReg = I.getOperand(0).getReg();
1787 const LLT Ty = MRI.getType(DstReg);
1788 Register Src1Reg = I.getOperand(1).getReg();
1789 Register Src2Reg = I.getOperand(2).getReg();
1790
1791 if (!Ty.isVector())
1792 return false;
1793
1794 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1795
1796 // We expect the immediate case to be lowered in the PostLegalCombiner to
1797 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1798
1799 // There is not a shift right register instruction, but the shift left
1800 // register instruction takes a signed value, where negative numbers specify a
1801 // right shift.
1802
1803 unsigned Opc = 0;
1804 unsigned NegOpc = 0;
1805 const TargetRegisterClass *RC =
1806 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1807 if (Ty == LLT::fixed_vector(2, 64)) {
1808 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1809 NegOpc = AArch64::NEGv2i64;
1810 } else if (Ty == LLT::fixed_vector(4, 32)) {
1811 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1812 NegOpc = AArch64::NEGv4i32;
1813 } else if (Ty == LLT::fixed_vector(2, 32)) {
1814 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1815 NegOpc = AArch64::NEGv2i32;
1816 } else if (Ty == LLT::fixed_vector(4, 16)) {
1817 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1818 NegOpc = AArch64::NEGv4i16;
1819 } else if (Ty == LLT::fixed_vector(8, 16)) {
1820 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1821 NegOpc = AArch64::NEGv8i16;
1822 } else if (Ty == LLT::fixed_vector(16, 8)) {
1823 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1824 NegOpc = AArch64::NEGv16i8;
1825 } else if (Ty == LLT::fixed_vector(8, 8)) {
1826 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1827 NegOpc = AArch64::NEGv8i8;
1828 } else {
1829 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { } while (false);
1830 return false;
1831 }
1832
1833 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1834 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1835 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1836 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1837 I.eraseFromParent();
1838 return true;
1839}
1840
1841bool AArch64InstructionSelector::selectVaStartAAPCS(
1842 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1843 return false;
1844}
1845
1846bool AArch64InstructionSelector::selectVaStartDarwin(
1847 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1848 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1849 Register ListReg = I.getOperand(0).getReg();
1850
1851 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1852
1853 auto MIB =
1854 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1855 .addDef(ArgsAddrReg)
1856 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1857 .addImm(0)
1858 .addImm(0);
1859
1860 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1861
1862 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1863 .addUse(ArgsAddrReg)
1864 .addUse(ListReg)
1865 .addImm(0)
1866 .addMemOperand(*I.memoperands_begin());
1867
1868 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1869 I.eraseFromParent();
1870 return true;
1871}
1872
1873void AArch64InstructionSelector::materializeLargeCMVal(
1874 MachineInstr &I, const Value *V, unsigned OpFlags) {
1875 MachineBasicBlock &MBB = *I.getParent();
1876 MachineFunction &MF = *MBB.getParent();
1877 MachineRegisterInfo &MRI = MF.getRegInfo();
1878
1879 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1880 MovZ->addOperand(MF, I.getOperand(1));
1881 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1882 AArch64II::MO_NC);
1883 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1884 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1885
1886 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1887 Register ForceDstReg) {
1888 Register DstReg = ForceDstReg
1889 ? ForceDstReg
1890 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1891 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1892 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1893 MovI->addOperand(MF, MachineOperand::CreateGA(
1894 GV, MovZ->getOperand(1).getOffset(), Flags));
1895 } else {
1896 MovI->addOperand(
1897 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1898 MovZ->getOperand(1).getOffset(), Flags));
1899 }
1900 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1901 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1902 return DstReg;
1903 };
1904 Register DstReg = BuildMovK(MovZ.getReg(0),
1905 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1906 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1907 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1908}
1909
1910bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1911 MachineBasicBlock &MBB = *I.getParent();
1912 MachineFunction &MF = *MBB.getParent();
1913 MachineRegisterInfo &MRI = MF.getRegInfo();
1914
1915 switch (I.getOpcode()) {
1916 case TargetOpcode::G_SHL:
1917 case TargetOpcode::G_ASHR:
1918 case TargetOpcode::G_LSHR: {
1919 // These shifts are legalized to have 64 bit shift amounts because we want
1920 // to take advantage of the existing imported selection patterns that assume
1921 // the immediates are s64s. However, if the shifted type is 32 bits and for
1922 // some reason we receive input GMIR that has an s64 shift amount that's not
1923 // a G_CONSTANT, insert a truncate so that we can still select the s32
1924 // register-register variant.
1925 Register SrcReg = I.getOperand(1).getReg();
1926 Register ShiftReg = I.getOperand(2).getReg();
1927 const LLT ShiftTy = MRI.getType(ShiftReg);
1928 const LLT SrcTy = MRI.getType(SrcReg);
1929 if (SrcTy.isVector())
1930 return false;
1931 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast<void> (0));
1932 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1933 return false;
1934 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1935 assert(AmtMI && "could not find a vreg definition for shift amount")(static_cast<void> (0));
1936 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1937 // Insert a subregister copy to implement a 64->32 trunc
1938 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1939 .addReg(ShiftReg, 0, AArch64::sub_32);
1940 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1941 I.getOperand(2).setReg(Trunc.getReg(0));
1942 }
1943 return true;
1944 }
1945 case TargetOpcode::G_STORE: {
1946 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1947 MachineOperand &SrcOp = I.getOperand(0);
1948 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1949 // Allow matching with imported patterns for stores of pointers. Unlike
1950 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1951 // and constrain.
1952 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1953 Register NewSrc = Copy.getReg(0);
1954 SrcOp.setReg(NewSrc);
1955 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1956 Changed = true;
1957 }
1958 return Changed;
1959 }
1960 case TargetOpcode::G_PTR_ADD:
1961 return convertPtrAddToAdd(I, MRI);
1962 case TargetOpcode::G_LOAD: {
1963 // For scalar loads of pointers, we try to convert the dest type from p0
1964 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1965 // conversion, this should be ok because all users should have been
1966 // selected already, so the type doesn't matter for them.
1967 Register DstReg = I.getOperand(0).getReg();
1968 const LLT DstTy = MRI.getType(DstReg);
1969 if (!DstTy.isPointer())
1970 return false;
1971 MRI.setType(DstReg, LLT::scalar(64));
1972 return true;
1973 }
1974 case AArch64::G_DUP: {
1975 // Convert the type from p0 to s64 to help selection.
1976 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1977 if (!DstTy.getElementType().isPointer())
1978 return false;
1979 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1980 MRI.setType(I.getOperand(0).getReg(),
1981 DstTy.changeElementType(LLT::scalar(64)));
1982 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1983 I.getOperand(1).setReg(NewSrc.getReg(0));
1984 return true;
1985 }
1986 case TargetOpcode::G_UITOFP:
1987 case TargetOpcode::G_SITOFP: {
1988 // If both source and destination regbanks are FPR, then convert the opcode
1989 // to G_SITOF so that the importer can select it to an fpr variant.
1990 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1991 // copy.
1992 Register SrcReg = I.getOperand(1).getReg();
1993 LLT SrcTy = MRI.getType(SrcReg);
1994 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1995 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1996 return false;
1997
1998 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1999 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2000 I.setDesc(TII.get(AArch64::G_SITOF));
2001 else
2002 I.setDesc(TII.get(AArch64::G_UITOF));
2003 return true;
2004 }
2005 return false;
2006 }
2007 default:
2008 return false;
2009 }
2010}
2011
2012/// This lowering tries to look for G_PTR_ADD instructions and then converts
2013/// them to a standard G_ADD with a COPY on the source.
2014///
2015/// The motivation behind this is to expose the add semantics to the imported
2016/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2017/// because the selector works bottom up, uses before defs. By the time we
2018/// end up trying to select a G_PTR_ADD, we should have already attempted to
2019/// fold this into addressing modes and were therefore unsuccessful.
2020bool AArch64InstructionSelector::convertPtrAddToAdd(
2021 MachineInstr &I, MachineRegisterInfo &MRI) {
2022 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast<void> (0));
2023 Register DstReg = I.getOperand(0).getReg();
2024 Register AddOp1Reg = I.getOperand(1).getReg();
2025 const LLT PtrTy = MRI.getType(DstReg);
2026 if (PtrTy.getAddressSpace() != 0)
2027 return false;
2028
2029 const LLT CastPtrTy =
2030 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2031 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2032 // Set regbanks on the registers.
2033 if (PtrTy.isVector())
2034 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2035 else
2036 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2037
2038 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2039 // %dst(intty) = G_ADD %intbase, off
2040 I.setDesc(TII.get(TargetOpcode::G_ADD));
2041 MRI.setType(DstReg, CastPtrTy);
2042 I.getOperand(1).setReg(PtrToInt.getReg(0));
2043 if (!select(*PtrToInt)) {
2044 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { } while (false);
2045 return false;
2046 }
2047
2048 // Also take the opportunity here to try to do some optimization.
2049 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2050 Register NegatedReg;
2051 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2052 return true;
2053 I.getOperand(2).setReg(NegatedReg);
2054 I.setDesc(TII.get(TargetOpcode::G_SUB));
2055 return true;
2056}
2057
2058bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2059 MachineRegisterInfo &MRI) {
2060 // We try to match the immediate variant of LSL, which is actually an alias
2061 // for a special case of UBFM. Otherwise, we fall back to the imported
2062 // selector which will match the register variant.
2063 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast<void> (0));
2064 const auto &MO = I.getOperand(2);
2065 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2066 if (!VRegAndVal)
2067 return false;
2068
2069 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2070 if (DstTy.isVector())
2071 return false;
2072 bool Is64Bit = DstTy.getSizeInBits() == 64;
2073 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2074 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2075
2076 if (!Imm1Fn || !Imm2Fn)
2077 return false;
2078
2079 auto NewI =
2080 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2081 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2082
2083 for (auto &RenderFn : *Imm1Fn)
2084 RenderFn(NewI);
2085 for (auto &RenderFn : *Imm2Fn)
2086 RenderFn(NewI);
2087
2088 I.eraseFromParent();
2089 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2090}
2091
2092bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2093 MachineInstr &I, MachineRegisterInfo &MRI) {
2094 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast<void> (0));
2095 // If we're storing a scalar, it doesn't matter what register bank that
2096 // scalar is on. All that matters is the size.
2097 //
2098 // So, if we see something like this (with a 32-bit scalar as an example):
2099 //
2100 // %x:gpr(s32) = ... something ...
2101 // %y:fpr(s32) = COPY %x:gpr(s32)
2102 // G_STORE %y:fpr(s32)
2103 //
2104 // We can fix this up into something like this:
2105 //
2106 // G_STORE %x:gpr(s32)
2107 //
2108 // And then continue the selection process normally.
2109 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2110 if (!DefDstReg.isValid())
2111 return false;
2112 LLT DefDstTy = MRI.getType(DefDstReg);
2113 Register StoreSrcReg = I.getOperand(0).getReg();
2114 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2115
2116 // If we get something strange like a physical register, then we shouldn't
2117 // go any further.
2118 if (!DefDstTy.isValid())
2119 return false;
2120
2121 // Are the source and dst types the same size?
2122 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2123 return false;
2124
2125 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2126 RBI.getRegBank(DefDstReg, MRI, TRI))
2127 return false;
2128
2129 // We have a cross-bank copy, which is entering a store. Let's fold it.
2130 I.getOperand(0).setReg(DefDstReg);
2131 return true;
2132}
2133
2134bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2135 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast<void> (0));
2136 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast<void> (0));
2137
2138 MachineBasicBlock &MBB = *I.getParent();
2139 MachineFunction &MF = *MBB.getParent();
2140 MachineRegisterInfo &MRI = MF.getRegInfo();
2141
2142 switch (I.getOpcode()) {
2143 case AArch64::G_DUP: {
2144 // Before selecting a DUP instruction, check if it is better selected as a
2145 // MOV or load from a constant pool.
2146 Register Src = I.getOperand(1).getReg();
2147 auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
2148 if (!ValAndVReg)
2149 return false;
2150 LLVMContext &Ctx = MF.getFunction().getContext();
2151 Register Dst = I.getOperand(0).getReg();
2152 auto *CV = ConstantDataVector::getSplat(
2153 MRI.getType(Dst).getNumElements(),
2154 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2155 ValAndVReg->Value));
2156 if (!emitConstantVector(Dst, CV, MIB, MRI))
2157 return false;
2158 I.eraseFromParent();
2159 return true;
2160 }
2161 case TargetOpcode::G_BR:
2162 return false;
2163 case TargetOpcode::G_SHL:
2164 return earlySelectSHL(I, MRI);
2165 case TargetOpcode::G_CONSTANT: {
2166 bool IsZero = false;
2167 if (I.getOperand(1).isCImm())
2168 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2169 else if (I.getOperand(1).isImm())
2170 IsZero = I.getOperand(1).getImm() == 0;
2171
2172 if (!IsZero)
2173 return false;
2174
2175 Register DefReg = I.getOperand(0).getReg();
2176 LLT Ty = MRI.getType(DefReg);
2177 if (Ty.getSizeInBits() == 64) {
2178 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2179 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2180 } else if (Ty.getSizeInBits() == 32) {
2181 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2182 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2183 } else
2184 return false;
2185
2186 I.setDesc(TII.get(TargetOpcode::COPY));
2187 return true;
2188 }
2189
2190 case TargetOpcode::G_ADD: {
2191 // Check if this is being fed by a G_ICMP on either side.
2192 //
2193 // (cmp pred, x, y) + z
2194 //
2195 // In the above case, when the cmp is true, we increment z by 1. So, we can
2196 // fold the add into the cset for the cmp by using cinc.
2197 //
2198 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2199 Register X = I.getOperand(1).getReg();
2200
2201 // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
2202 // early if we see it.
2203 LLT Ty = MRI.getType(X);
2204 if (Ty.isVector() || Ty.getSizeInBits() != 32)
2205 return false;
2206
2207 Register CmpReg = I.getOperand(2).getReg();
2208 MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2209 if (!Cmp) {
2210 std::swap(X, CmpReg);
2211 Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2212 if (!Cmp)
2213 return false;
2214 }
2215 auto Pred =
2216 static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
2217 emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
2218 Cmp->getOperand(1), MIB);
2219 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
2220 I.eraseFromParent();
2221 return true;
2222 }
2223 case TargetOpcode::G_OR: {
2224 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2225 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2226 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2227 Register Dst = I.getOperand(0).getReg();
2228 LLT Ty = MRI.getType(Dst);
2229
2230 if (!Ty.isScalar())
2231 return false;
2232
2233 unsigned Size = Ty.getSizeInBits();
2234 if (Size != 32 && Size != 64)
2235 return false;
2236
2237 Register ShiftSrc;
2238 int64_t ShiftImm;
2239 Register MaskSrc;
2240 int64_t MaskImm;
2241 if (!mi_match(
2242 Dst, MRI,
2243 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2244 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2245 return false;
2246
2247 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2248 return false;
2249
2250 int64_t Immr = Size - ShiftImm;
2251 int64_t Imms = Size - ShiftImm - 1;
2252 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2253 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2254 I.eraseFromParent();
2255 return true;
2256 }
2257 default:
2258 return false;
2259 }
2260}
2261
2262bool AArch64InstructionSelector::select(MachineInstr &I) {
2263 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast<void> (0));
2264 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast<void> (0));
2265
2266 MachineBasicBlock &MBB = *I.getParent();
2267 MachineFunction &MF = *MBB.getParent();
2268 MachineRegisterInfo &MRI = MF.getRegInfo();
2269
2270 const AArch64Subtarget *Subtarget =
2271 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2272 if (Subtarget->requiresStrictAlign()) {
2273 // We don't support this feature yet.
2274 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { } while (false);
2275 return false;
2276 }
2277
2278 MIB.setInstrAndDebugLoc(I);
2279
2280 unsigned Opcode = I.getOpcode();
2281 // G_PHI requires same handling as PHI
2282 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2283 // Certain non-generic instructions also need some special handling.
2284
2285 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2286 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2287
2288 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2289 const Register DefReg = I.getOperand(0).getReg();
2290 const LLT DefTy = MRI.getType(DefReg);
2291
2292 const RegClassOrRegBank &RegClassOrBank =
2293 MRI.getRegClassOrRegBank(DefReg);
2294
2295 const TargetRegisterClass *DefRC
2296 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2297 if (!DefRC) {
2298 if (!DefTy.isValid()) {
2299 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { } while (false);
2300 return false;
2301 }
2302 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2303 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2304 if (!DefRC) {
2305 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { } while (false);
2306 return false;
2307 }
2308 }
2309
2310 I.setDesc(TII.get(TargetOpcode::PHI));
2311
2312 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2313 }
2314
2315 if (I.isCopy())
2316 return selectCopy(I, TII, MRI, TRI, RBI);
2317
2318 return true;
2319 }
2320
2321
2322 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2323 LLVM_DEBUG(do { } while (false)
2324 dbgs() << "Generic instruction has unexpected implicit operands\n")do { } while (false);
2325 return false;
2326 }
2327
2328 // Try to do some lowering before we start instruction selecting. These
2329 // lowerings are purely transformations on the input G_MIR and so selection
2330 // must continue after any modification of the instruction.
2331 if (preISelLower(I)) {
2332 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2333 }
2334
2335 // There may be patterns where the importer can't deal with them optimally,
2336 // but does select it to a suboptimal sequence so our custom C++ selection
2337 // code later never has a chance to work on it. Therefore, we have an early
2338 // selection attempt here to give priority to certain selection routines
2339 // over the imported ones.
2340 if (earlySelect(I))
2341 return true;
2342
2343 if (selectImpl(I, *CoverageInfo))
2344 return true;
2345
2346 LLT Ty =
2347 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2348
2349 switch (Opcode) {
2350 case TargetOpcode::G_SBFX:
2351 case TargetOpcode::G_UBFX: {
2352 static const unsigned OpcTable[2][2] = {
2353 {AArch64::UBFMWri, AArch64::UBFMXri},
2354 {AArch64::SBFMWri, AArch64::SBFMXri}};
2355 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2356 unsigned Size = Ty.getSizeInBits();
2357 unsigned Opc = OpcTable[IsSigned][Size == 64];
2358 auto Cst1 =
2359 getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2360 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast<void> (0));
2361 auto Cst2 =
2362 getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2363 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast<void> (0));
2364 auto LSB = Cst1->Value.getZExtValue();
2365 auto Width = Cst2->Value.getZExtValue();
2366 auto BitfieldInst =
2367 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2368 .addImm(LSB)
2369 .addImm(LSB + Width - 1);
2370 I.eraseFromParent();
2371 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2372 }
2373 case TargetOpcode::G_BRCOND:
2374 return selectCompareBranch(I, MF, MRI);
2375
2376 case TargetOpcode::G_BRINDIRECT: {
2377 I.setDesc(TII.get(AArch64::BR));
2378 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2379 }
2380
2381 case TargetOpcode::G_BRJT:
2382 return selectBrJT(I, MRI);
2383
2384 case AArch64::G_ADD_LOW: {
2385 // This op may have been separated from it's ADRP companion by the localizer
2386 // or some other code motion pass. Given that many CPUs will try to
2387 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2388 // which will later be expanded into an ADRP+ADD pair after scheduling.
2389 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2390 if (BaseMI->getOpcode() != AArch64::ADRP) {
2391 I.setDesc(TII.get(AArch64::ADDXri));
2392 I.addOperand(MachineOperand::CreateImm(0));
2393 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2394 }
2395 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast<void> (0))
2396 "Expected small code model")(static_cast<void> (0));
2397 auto Op1 = BaseMI->getOperand(1);
2398 auto Op2 = I.getOperand(2);
2399 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2400 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2401 Op1.getTargetFlags())
2402 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2403 Op2.getTargetFlags());
2404 I.eraseFromParent();
2405 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2406 }
2407
2408 case TargetOpcode::G_BSWAP: {
2409 // Handle vector types for G_BSWAP directly.
2410 Register DstReg = I.getOperand(0).getReg();
2411 LLT DstTy = MRI.getType(DstReg);
2412
2413 // We should only get vector types here; everything else is handled by the
2414 // importer right now.
2415 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2416 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { } while (false);
2417 return false;
2418 }
2419
2420 // Only handle 4 and 2 element vectors for now.
2421 // TODO: 16-bit elements.
2422 unsigned NumElts = DstTy.getNumElements();
2423 if (NumElts != 4 && NumElts != 2) {
2424 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { } while (false);
2425 return false;
2426 }
2427
2428 // Choose the correct opcode for the supported types. Right now, that's
2429 // v2s32, v4s32, and v2s64.
2430 unsigned Opc = 0;
2431 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2432 if (EltSize == 32)
2433 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2434 : AArch64::REV32v16i8;
2435 else if (EltSize == 64)
2436 Opc = AArch64::REV64v16i8;
2437
2438 // We should always get something by the time we get here...
2439 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast<void> (0));
2440
2441 I.setDesc(TII.get(Opc));
2442 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2443 }
2444
2445 case TargetOpcode::G_FCONSTANT:
2446 case TargetOpcode::G_CONSTANT: {
2447 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2448
2449 const LLT s8 = LLT::scalar(8);
2450 const LLT s16 = LLT::scalar(16);
2451 const LLT s32 = LLT::scalar(32);
2452 const LLT s64 = LLT::scalar(64);
2453 const LLT s128 = LLT::scalar(128);
2454 const LLT p0 = LLT::pointer(0, 64);
2455
2456 const Register DefReg = I.getOperand(0).getReg();
2457 const LLT DefTy = MRI.getType(DefReg);
2458 const unsigned DefSize = DefTy.getSizeInBits();
2459 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2460
2461 // FIXME: Redundant check, but even less readable when factored out.
2462 if (isFP) {
2463 if (Ty != s32 && Ty != s64 && Ty != s128) {
2464 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { } while (false)
2465 << " constant, expected: " << s32 << " or " << s64do { } while (false)
2466 << " or " << s128 << '\n')do { } while (false);
2467 return false;
2468 }
2469
2470 if (RB.getID() != AArch64::FPRRegBankID) {
2471 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { } while (false)
2472 << " constant on bank: " << RBdo { } while (false)
2473 << ", expected: FPR\n")do { } while (false);
2474 return false;
2475 }
2476
2477 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2478 // can be sure tablegen works correctly and isn't rescued by this code.
2479 // 0.0 is not covered by tablegen for FP128. So we will handle this
2480 // scenario in the code here.
2481 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2482 return false;
2483 } else {
2484 // s32 and s64 are covered by tablegen.
2485 if (Ty != p0 && Ty != s8 && Ty != s16) {
2486 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { } while (false)
2487 << " constant, expected: " << s32 << ", " << s64do { } while (false)
2488 << ", or " << p0 << '\n')do { } while (false);
2489 return false;
2490 }
2491
2492 if (RB.getID() != AArch64::GPRRegBankID) {
2493 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { } while (false)
2494 << " constant on bank: " << RBdo { } while (false)
2495 << ", expected: GPR\n")do { } while (false);
2496 return false;
2497 }
2498 }
2499
2500 // We allow G_CONSTANT of types < 32b.
2501 const unsigned MovOpc =
2502 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2503
2504 if (isFP) {
2505 // Either emit a FMOV, or emit a copy to emit a normal mov.
2506 const TargetRegisterClass &GPRRC =
2507 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2508 const TargetRegisterClass &FPRRC =
2509 DefSize == 32 ? AArch64::FPR32RegClass
2510 : (DefSize == 64 ? AArch64::FPR64RegClass
2511 : AArch64::FPR128RegClass);
2512
2513 // For 64b values, emit a constant pool load instead.
2514 // For s32, use a cp load if we have optsize/minsize.
2515 if (DefSize == 64 || DefSize == 128 ||
2516 (DefSize == 32 && shouldOptForSize(&MF))) {
2517 auto *FPImm = I.getOperand(1).getFPImm();
2518 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2519 if (!LoadMI) {
2520 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { } while (false);
2521 return false;
2522 }
2523 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2524 I.eraseFromParent();
2525 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2526 }
2527
2528 // Nope. Emit a copy and use a normal mov instead.
2529 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2530 MachineOperand &RegOp = I.getOperand(0);
2531 RegOp.setReg(DefGPRReg);
2532 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2533 MIB.buildCopy({DefReg}, {DefGPRReg});
2534
2535 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2536 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { } while (false);
2537 return false;
2538 }
2539
2540 MachineOperand &ImmOp = I.getOperand(1);
2541 // FIXME: Is going through int64_t always correct?
2542 ImmOp.ChangeToImmediate(
2543 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2544 } else if (I.getOperand(1).isCImm()) {
2545 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2546 I.getOperand(1).ChangeToImmediate(Val);
2547 } else if (I.getOperand(1).isImm()) {
2548 uint64_t Val = I.getOperand(1).getImm();
2549 I.getOperand(1).ChangeToImmediate(Val);
2550 }
2551
2552 I.setDesc(TII.get(MovOpc));
2553 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2554 return true;
2555 }
2556 case TargetOpcode::G_EXTRACT: {
2557 Register DstReg = I.getOperand(0).getReg();
2558 Register SrcReg = I.getOperand(1).getReg();
2559 LLT SrcTy = MRI.getType(SrcReg);
2560 LLT DstTy = MRI.getType(DstReg);
2561 (void)DstTy;
2562 unsigned SrcSize = SrcTy.getSizeInBits();
2563
2564 if (SrcTy.getSizeInBits() > 64) {
2565 // This should be an extract of an s128, which is like a vector extract.
2566 if (SrcTy.getSizeInBits() != 128)
2567 return false;
2568 // Only support extracting 64 bits from an s128 at the moment.
2569 if (DstTy.getSizeInBits() != 64)
2570 return false;
2571
2572 unsigned Offset = I.getOperand(2).getImm();
2573 if (Offset % 64 != 0)
2574 return false;
2575
2576 // Check we have the right regbank always.
2577 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2578 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2579 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast<void> (0));
2580
2581 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2582 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2583 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2584 I.eraseFromParent();
2585 return true;
2586 }
2587
2588 // Emit the same code as a vector extract.
2589 // Offset must be a multiple of 64.
2590 unsigned LaneIdx = Offset / 64;
2591 MachineInstr *Extract = emitExtractVectorElt(
2592 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2593 if (!Extract)
2594 return false;
2595 I.eraseFromParent();
2596 return true;
2597 }
2598
2599 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2600 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2601 Ty.getSizeInBits() - 1);
2602
2603 if (SrcSize < 64) {
2604 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast<void> (0))
2605 "unexpected G_EXTRACT types")(static_cast<void> (0));
2606 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2607 }
2608
2609 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2610 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2611 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2612 .addReg(DstReg, 0, AArch64::sub_32);
2613 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2614 AArch64::GPR32RegClass, MRI);
2615 I.getOperand(0).setReg(DstReg);
2616
2617 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2618 }
2619
2620 case TargetOpcode::G_INSERT: {
2621 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2622 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2623 unsigned DstSize = DstTy.getSizeInBits();
2624 // Larger inserts are vectors, same-size ones should be something else by
2625 // now (split up or turned into COPYs).
2626 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2627 return false;
2628
2629 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2630 unsigned LSB = I.getOperand(3).getImm();
2631 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2632 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2633 MachineInstrBuilder(MF, I).addImm(Width - 1);
2634
2635 if (DstSize < 64) {
2636 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast<void> (0))
2637 "unexpected G_INSERT types")(static_cast<void> (0));
2638 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2639 }
2640
2641 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2642 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2643 TII.get(AArch64::SUBREG_TO_REG))
2644 .addDef(SrcReg)
2645 .addImm(0)
2646 .addUse(I.getOperand(2).getReg())
2647 .addImm(AArch64::sub_32);
2648 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2649 AArch64::GPR32RegClass, MRI);
2650 I.getOperand(2).setReg(SrcReg);
2651
2652 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2653 }
2654 case TargetOpcode::G_FRAME_INDEX: {
2655 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2656 if (Ty != LLT::pointer(0, 64)) {
2657 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { } while (false)
2658 << ", expected: " << LLT::pointer(0, 64) << '\n')do { } while (false);
2659 return false;
2660 }
2661 I.setDesc(TII.get(AArch64::ADDXri));
2662
2663 // MOs for a #0 shifted immediate.
2664 I.addOperand(MachineOperand::CreateImm(0));
2665 I.addOperand(MachineOperand::CreateImm(0));
2666
2667 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2668 }
2669
2670 case TargetOpcode::G_GLOBAL_VALUE: {
2671 auto GV = I.getOperand(1).getGlobal();
2672 if (GV->isThreadLocal())
2673 return selectTLSGlobalValue(I, MRI);
2674
2675 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2676 if (OpFlags & AArch64II::MO_GOT) {
2677 I.setDesc(TII.get(AArch64::LOADgot));
2678 I.getOperand(1).setTargetFlags(OpFlags);
2679 } else if (TM.getCodeModel() == CodeModel::Large) {
2680 // Materialize the global using movz/movk instructions.
2681 materializeLargeCMVal(I, GV, OpFlags);
2682 I.eraseFromParent();
2683 return true;
2684 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2685 I.setDesc(TII.get(AArch64::ADR));
2686 I.getOperand(1).setTargetFlags(OpFlags);
2687 } else {
2688 I.setDesc(TII.get(AArch64::MOVaddr));
2689 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2690 MachineInstrBuilder MIB(MF, I);
2691 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2692 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2693 }
2694 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2695 }
2696
2697 case TargetOpcode::G_ZEXTLOAD:
2698 case TargetOpcode::G_LOAD:
2699 case TargetOpcode::G_STORE: {
2700 GLoadStore &LdSt = cast<GLoadStore>(I);
2701 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2702 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2703
2704 if (PtrTy != LLT::pointer(0, 64)) {
2705 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { } while (false)
2706 << ", expected: " << LLT::pointer(0, 64) << '\n')do { } while (false);
2707 return false;
2708 }
2709
2710 uint64_t MemSizeInBytes = LdSt.getMemSize();
2711 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2712 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2713
2714 // Need special instructions for atomics that affect ordering.
2715 if (Order != AtomicOrdering::NotAtomic &&
2716 Order != AtomicOrdering::Unordered &&
2717 Order != AtomicOrdering::Monotonic) {
2718 assert(!isa<GZExtLoad>(LdSt))(static_cast<void> (0));
2719 if (MemSizeInBytes > 64)
2720 return false;
2721
2722 if (isa<GLoad>(LdSt)) {
2723 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2724 AArch64::LDARW, AArch64::LDARX};
2725 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2726 } else {
2727 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2728 AArch64::STLRW, AArch64::STLRX};
2729 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2730 }
2731 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2732 return true;
2733 }
2734
2735#ifndef NDEBUG1
2736 const Register PtrReg = LdSt.getPointerReg();
2737 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2738 // Sanity-check the pointer register.
2739 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast<void> (0))
2740 "Load/Store pointer operand isn't a GPR")(static_cast<void> (0));
2741 assert(MRI.getType(PtrReg).isPointer() &&(static_cast<void> (0))
2742 "Load/Store pointer operand isn't a pointer")(static_cast<void> (0));
2743#endif
2744
2745 const Register ValReg = LdSt.getReg(0);
2746 const LLT ValTy = MRI.getType(ValReg);
2747 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2748
2749 // The code below doesn't support truncating stores, so we need to split it
2750 // again.
2751 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2752 unsigned SubReg;
2753 LLT MemTy = LdSt.getMMO().getMemoryType();
2754 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2755 if (!getSubRegForClass(RC, TRI, SubReg))
2756 return false;
2757
2758 // Generate a subreg copy.
2759 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2760 .addReg(ValReg, 0, SubReg)
2761 .getReg(0);
2762 RBI.constrainGenericRegister(Copy, *RC, MRI);
2763 LdSt.getOperand(0).setReg(Copy);
2764 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2765 // If this is an any-extending load from the FPR bank, split it into a regular
2766 // load + extend.
2767 if (RB.getID() == AArch64::FPRRegBankID) {
2768 unsigned SubReg;
2769 LLT MemTy = LdSt.getMMO().getMemoryType();
2770 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2771 if (!getSubRegForClass(RC, TRI, SubReg))
2772 return false;
2773 Register OldDst = LdSt.getReg(0);
2774 Register NewDst =
2775 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2776 LdSt.getOperand(0).setReg(NewDst);
2777 MRI.setRegBank(NewDst, RB);
2778 // Generate a SUBREG_TO_REG to extend it.
2779 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2780 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2781 .addImm(0)
2782 .addUse(NewDst)
2783 .addImm(SubReg);
2784 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2785 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2786 MIB.setInstr(LdSt);
2787 }
2788 }
2789
2790 // Helper lambda for partially selecting I. Either returns the original
2791 // instruction with an updated opcode, or a new instruction.
2792 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2793 bool IsStore = isa<GStore>(I);
1
Assuming 'I' is not a 'GStore'
2794 const unsigned NewOpc =
2795 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2796 if (NewOpc == I.getOpcode())
2
Assuming the condition is false
3
Taking false branch
2797 return nullptr;
2798 // Check if we can fold anything into the addressing mode.
2799 auto AddrModeFns =
2800 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
4
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2801 if (!AddrModeFns) {
2802 // Can't fold anything. Use the original instruction.
2803 I.setDesc(TII.get(NewOpc));
2804 I.addOperand(MachineOperand::CreateImm(0));
2805 return &I;
2806 }
2807
2808 // Folded something. Create a new instruction and return it.
2809 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2810 Register CurValReg = I.getOperand(0).getReg();
2811 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2812 NewInst.cloneMemRefs(I);
2813 for (auto &Fn : *AddrModeFns)
2814 Fn(NewInst);
2815 I.eraseFromParent();
2816 return &*NewInst;
2817 };
2818
2819 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2820 if (!LoadStore)
2821 return false;
2822
2823 // If we're storing a 0, use WZR/XZR.
2824 if (Opcode == TargetOpcode::G_STORE) {
2825 auto CVal = getConstantVRegValWithLookThrough(
2826 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2827 /*HandleFConstants = */ false);
2828 if (CVal && CVal->Value == 0) {
2829 switch (LoadStore->getOpcode()) {
2830 case AArch64::STRWui:
2831 case AArch64::STRHHui:
2832 case AArch64::STRBBui:
2833 LoadStore->getOperand(0).setReg(AArch64::WZR);
2834 break;
2835 case AArch64::STRXui:
2836 LoadStore->getOperand(0).setReg(AArch64::XZR);
2837 break;
2838 }
2839 }
2840 }
2841
2842 if (IsZExtLoad) {
2843 // The zextload from a smaller type to i32 should be handled by the
2844 // importer.
2845 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2846 return false;
2847 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2848 // and zero_extend with SUBREG_TO_REG.
2849 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2850 Register DstReg = LoadStore->getOperand(0).getReg();
2851 LoadStore->getOperand(0).setReg(LdReg);
2852
2853 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2854 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2855 .addImm(0)
2856 .addUse(LdReg)
2857 .addImm(AArch64::sub_32);
2858 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2859 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2860 MRI);
2861 }
2862 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2863 }
2864
2865 case TargetOpcode::G_SMULH:
2866 case TargetOpcode::G_UMULH: {
2867 // Reject the various things we don't support yet.
2868 if (unsupportedBinOp(I, RBI, MRI, TRI))
2869 return false;
2870
2871 const Register DefReg = I.getOperand(0).getReg();
2872 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2873
2874 if (RB.getID() != AArch64::GPRRegBankID) {
2875 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { } while (false);
2876 return false;
2877 }
2878
2879 if (Ty != LLT::scalar(64)) {
2880 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { } while (false)
2881 << ", expected: " << LLT::scalar(64) << '\n')do { } while (false);
2882 return false;
2883 }
2884
2885 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2886 : AArch64::UMULHrr;
2887 I.setDesc(TII.get(NewOpc));
2888
2889 // Now that we selected an opcode, we need to constrain the register
2890 // operands to use appropriate classes.
2891 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2892 }
2893 case TargetOpcode::G_LSHR:
2894 case TargetOpcode::G_ASHR:
2895 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2896 return selectVectorAshrLshr(I, MRI);
2897 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2898 case TargetOpcode::G_SHL:
2899 if (Opcode == TargetOpcode::G_SHL &&
2900 MRI.getType(I.getOperand(0).getReg()).isVector())
2901 return selectVectorSHL(I, MRI);
2902 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2903 case TargetOpcode::G_FADD:
2904 case TargetOpcode::G_FSUB:
2905 case TargetOpcode::G_FMUL:
2906 case TargetOpcode::G_FDIV:
2907 case TargetOpcode::G_OR: {
2908 // Reject the various things we don't support yet.
2909 if (unsupportedBinOp(I, RBI, MRI, TRI))
2910 return false;
2911
2912 const unsigned OpSize = Ty.getSizeInBits();
2913
2914 const Register DefReg = I.getOperand(0).getReg();
2915 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2916
2917 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2918 if (NewOpc == I.getOpcode())
2919 return false;
2920
2921 I.setDesc(TII.get(NewOpc));
2922 // FIXME: Should the type be always reset in setDesc?
2923
2924 // Now that we selected an opcode, we need to constrain the register
2925 // operands to use appropriate classes.
2926 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2927 }
2928
2929 case TargetOpcode::G_PTR_ADD: {
2930 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2931 I.eraseFromParent();
2932 return true;
2933 }
2934 case TargetOpcode::G_SADDO:
2935 case TargetOpcode::G_UADDO:
2936 case TargetOpcode::G_SSUBO:
2937 case TargetOpcode::G_USUBO: {
2938 // Emit the operation and get the correct condition code.
2939 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2940 I.getOperand(2), I.getOperand(3), MIB);
2941
2942 // Now, put the overflow result in the register given by the first operand
2943 // to the overflow op. CSINC increments the result when the predicate is
2944 // false, so to get the increment when it's true, we need to use the
2945 // inverse. In this case, we want to increment when carry is set.
2946 Register ZReg = AArch64::WZR;
2947 auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2948 {ZReg, ZReg})
2949 .addImm(getInvertedCondCode(OpAndCC.second));
2950 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2951 I.eraseFromParent();
2952 return true;
2953 }
2954
2955 case TargetOpcode::G_PTRMASK: {
2956 Register MaskReg = I.getOperand(2).getReg();
2957 Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
2958 // TODO: Implement arbitrary cases
2959 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2960 return false;
2961
2962 uint64_t Mask = *MaskVal;
2963 I.setDesc(TII.get(AArch64::ANDXri));
2964 I.getOperand(2).ChangeToImmediate(
2965 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2966
2967 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2968 }
2969 case TargetOpcode::G_PTRTOINT:
2970 case TargetOpcode::G_TRUNC: {
2971 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2972 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2973
2974 const Register DstReg = I.getOperand(0).getReg();
2975 const Register SrcReg = I.getOperand(1).getReg();
2976
2977 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2978 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2979
2980 if (DstRB.getID() != SrcRB.getID()) {
2981 LLVM_DEBUG(do { } while (false)
2982 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { } while (false);
2983 return false;
2984 }
2985
2986 if (DstRB.getID() == AArch64::GPRRegBankID) {
2987 const TargetRegisterClass *DstRC =
2988 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2989 if (!DstRC)
2990 return false;
2991
2992 const TargetRegisterClass *SrcRC =
2993 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2994 if (!SrcRC)
2995 return false;
2996
2997 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2998 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2999 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { } while (false);
3000 return false;
3001 }
3002
3003 if (DstRC == SrcRC) {
3004 // Nothing to be done
3005 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3006 SrcTy == LLT::scalar(64)) {
3007 llvm_unreachable("TableGen can import this case")__builtin_unreachable();
3008 return false;
3009 } else if (DstRC == &AArch64::GPR32RegClass &&
3010 SrcRC == &AArch64::GPR64RegClass) {
3011 I.getOperand(1).setSubReg(AArch64::sub_32);
3012 } else {
3013 LLVM_DEBUG(do { } while (false)
3014 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { } while (false);
3015 return false;
3016 }
3017
3018 I.setDesc(TII.get(TargetOpcode::COPY));
3019 return true;
3020 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3021 if (DstTy == LLT::fixed_vector(4, 16) &&
3022 SrcTy == LLT::fixed_vector(4, 32)) {
3023 I.setDesc(TII.get(AArch64::XTNv4i16));
3024 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3025 return true;
3026 }
3027
3028 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3029 MachineInstr *Extract = emitExtractVectorElt(
3030 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3031 if (!Extract)
3032 return false;
3033 I.eraseFromParent();
3034 return true;
3035 }
3036
3037 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3038 if (Opcode == TargetOpcode::G_PTRTOINT) {
3039 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast<void> (0));
3040 I.setDesc(TII.get(TargetOpcode::COPY));
3041 return selectCopy(I, TII, MRI, TRI, RBI);
3042 }
3043 }
3044
3045 return false;
3046 }
3047
3048 case TargetOpcode::G_ANYEXT: {
3049 const Register DstReg = I.getOperand(0).getReg();
3050 const Register SrcReg = I.getOperand(1).getReg();
3051
3052 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3053 if (RBDst.getID() != AArch64::GPRRegBankID) {
3054 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { } while (false)
3055 << ", expected: GPR\n")do { } while (false);
3056 return false;
3057 }
3058
3059 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3060 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3061 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { } while (false)
3062 << ", expected: GPR\n")do { } while (false);
3063 return false;
3064 }
3065
3066 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3067
3068 if (DstSize == 0) {
3069 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { } while (false);
3070 return false;
3071 }
3072
3073 if (DstSize != 64 && DstSize > 32) {
3074 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { } while (false)
3075 << ", expected: 32 or 64\n")do { } while (false);
3076 return false;
3077 }
3078 // At this point G_ANYEXT is just like a plain COPY, but we need
3079 // to explicitly form the 64-bit value if any.
3080 if (DstSize > 32) {
3081 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3082 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3083 .addDef(ExtSrc)
3084 .addImm(0)
3085 .addUse(SrcReg)
3086 .addImm(AArch64::sub_32);
3087 I.getOperand(1).setReg(ExtSrc);
3088 }
3089 return selectCopy(I, TII, MRI, TRI, RBI);
3090 }
3091
3092 case TargetOpcode::G_ZEXT:
3093 case TargetOpcode::G_SEXT_INREG:
3094 case TargetOpcode::G_SEXT: {
3095 unsigned Opcode = I.getOpcode();
3096 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3097 const Register DefReg = I.getOperand(0).getReg();
3098 Register SrcReg = I.getOperand(1).getReg();
3099 const LLT DstTy = MRI.getType(DefReg);
3100 const LLT SrcTy = MRI.getType(SrcReg);
3101 unsigned DstSize = DstTy.getSizeInBits();
3102 unsigned SrcSize = SrcTy.getSizeInBits();
3103
3104 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3105 // extended is encoded in the imm.
3106 if (Opcode == TargetOpcode::G_SEXT_INREG)
3107 SrcSize = I.getOperand(2).getImm();
3108
3109 if (DstTy.isVector())
3110 return false; // Should be handled by imported patterns.
3111
3112 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast<void> (0))
3113 AArch64::GPRRegBankID &&(static_cast<void> (0))
3114 "Unexpected ext regbank")(static_cast<void> (0));
3115
3116 MachineInstr *ExtI;
3117
3118 // First check if we're extending the result of a load which has a dest type
3119 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3120 // GPR register on AArch64 and all loads which are smaller automatically
3121 // zero-extend the upper bits. E.g.
3122 // %v(s8) = G_LOAD %p, :: (load 1)
3123 // %v2(s32) = G_ZEXT %v(s8)
3124 if (!IsSigned) {
3125 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3126 bool IsGPR =
3127 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3128 if (LoadMI && IsGPR) {
3129 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3130 unsigned BytesLoaded = MemOp->getSize();
3131 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3132 return selectCopy(I, TII, MRI, TRI, RBI);
3133 }
3134
3135 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3136 // + SUBREG_TO_REG.
3137 //
3138 // If we are zero extending from 32 bits to 64 bits, it's possible that
3139 // the instruction implicitly does the zero extend for us. In that case,
3140 // we only need the SUBREG_TO_REG.
3141 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3142 // Unlike with the G_LOAD case, we don't want to look through copies
3143 // here. (See isDef32.)
3144 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3145 Register SubregToRegSrc = SrcReg;
3146
3147 // Does the instruction implicitly zero extend?
3148 if (!Def || !isDef32(*Def)) {
3149 // No. Zero out using an OR.
3150 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3151 const Register ZReg = AArch64::WZR;
3152 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3153 SubregToRegSrc = OrDst;
3154 }
3155
3156 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3157 .addImm(0)
3158 .addUse(SubregToRegSrc)
3159 .addImm(AArch64::sub_32);
3160
3161 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3162 MRI)) {
3163 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { } while (false);
3164 return false;
3165 }
3166
3167 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3168 MRI)) {
3169 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { } while (false);
3170 return false;
3171 }
3172
3173 I.eraseFromParent();
3174 return true;
3175 }
3176 }
3177
3178 if (DstSize == 64) {
3179 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3180 // FIXME: Can we avoid manually doing this?
3181 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3182 MRI)) {
3183 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { } while (false)
3184 << " operand\n")do { } while (false);
3185 return false;
3186 }
3187 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3188 {&AArch64::GPR64RegClass}, {})
3189 .addImm(0)
3190 .addUse(SrcReg)
3191 .addImm(AArch64::sub_32)
3192 .getReg(0);
3193 }
3194
3195 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3196 {DefReg}, {SrcReg})
3197 .addImm(0)
3198 .addImm(SrcSize - 1);
3199 } else if (DstSize <= 32) {
3200 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3201 {DefReg}, {SrcReg})
3202 .addImm(0)
3203 .addImm(SrcSize - 1);
3204 } else {
3205 return false;
3206 }
3207
3208 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3209 I.eraseFromParent();
3210 return true;
3211 }
3212
3213 case TargetOpcode::G_SITOFP:
3214 case TargetOpcode::G_UITOFP:
3215 case TargetOpcode::G_FPTOSI:
3216 case TargetOpcode::G_FPTOUI: {
3217 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3218 SrcTy = MRI.getType(I.getOperand(1).getReg());
3219 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3220 if (NewOpc == Opcode)
3221 return false;
3222
3223 I.setDesc(TII.get(NewOpc));
3224 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3225
3226 return true;
3227 }
3228
3229 case TargetOpcode::G_FREEZE:
3230 return selectCopy(I, TII, MRI, TRI, RBI);
3231
3232 case TargetOpcode::G_INTTOPTR:
3233 // The importer is currently unable to import pointer types since they
3234 // didn't exist in SelectionDAG.
3235 return selectCopy(I, TII, MRI, TRI, RBI);
3236
3237 case TargetOpcode::G_BITCAST:
3238 // Imported SelectionDAG rules can handle every bitcast except those that
3239 // bitcast from a type to the same type. Ideally, these shouldn't occur
3240 // but we might not run an optimizer that deletes them. The other exception
3241 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3242 // of them.
3243 return selectCopy(I, TII, MRI, TRI, RBI);
3244
3245 case TargetOpcode::G_SELECT: {
3246 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3247 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { } while (false)
3248 << ", expected: " << LLT::scalar(1) << '\n')do { } while (false);
3249 return false;
3250 }
3251
3252 const Register CondReg = I.getOperand(1).getReg();
3253 const Register TReg = I.getOperand(2).getReg();
3254 const Register FReg = I.getOperand(3).getReg();
3255
3256 if (tryOptSelect(I))
3257 return true;
3258
3259 // Make sure to use an unused vreg instead of wzr, so that the peephole
3260 // optimizations will be able to optimize these.
3261 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3262 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3263 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3264 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3265 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3266 return false;
3267 I.eraseFromParent();
3268 return true;
3269 }
3270 case TargetOpcode::G_ICMP: {
3271 if (Ty.isVector())
3272 return selectVectorICmp(I, MRI);
3273
3274 if (Ty != LLT::scalar(32)) {
3275 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { } while (false)
3276 << ", expected: " << LLT::scalar(32) << '\n')do { } while (false);
3277 return false;
3278 }
3279
3280 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3281 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3282 MIB);
3283 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
3284 I.eraseFromParent();
3285 return true;
3286 }
3287
3288 case TargetOpcode::G_FCMP: {
3289 CmpInst::Predicate Pred =
3290 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3291 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3292 Pred) ||
3293 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3294 return false;
3295 I.eraseFromParent();
3296 return true;
3297 }
3298 case TargetOpcode::G_VASTART:
3299 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3300 : selectVaStartAAPCS(I, MF, MRI);
3301 case TargetOpcode::G_INTRINSIC:
3302 return selectIntrinsic(I, MRI);
3303 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3304 return selectIntrinsicWithSideEffects(I, MRI);
3305 case TargetOpcode::G_IMPLICIT_DEF: {
3306 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3307 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3308 const Register DstReg = I.getOperand(0).getReg();
3309 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3310 const TargetRegisterClass *DstRC =
3311 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3312 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3313 return true;
3314 }
3315 case TargetOpcode::G_BLOCK_ADDR: {
3316 if (TM.getCodeModel() == CodeModel::Large) {
3317 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3318 I.eraseFromParent();
3319 return true;
3320 } else {
3321 I.setDesc(TII.get(AArch64::MOVaddrBA));
3322 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3323 I.getOperand(0).getReg())
3324 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3325 /* Offset */ 0, AArch64II::MO_PAGE)
3326 .addBlockAddress(
3327 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3328 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3329 I.eraseFromParent();
3330 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3331 }
3332 }
3333 case AArch64::G_DUP: {
3334 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3335 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3336 // difficult because at RBS we may end up pessimizing the fpr case if we
3337 // decided to add an anyextend to fix this. Manual selection is the most
3338 // robust solution for now.
3339 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3340 AArch64::GPRRegBankID)
3341 return false; // We expect the fpr regbank case to be imported.
3342 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3343 if (VecTy == LLT::fixed_vector(8, 8))
3344 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3345 else if (VecTy == LLT::fixed_vector(16, 8))
3346 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3347 else if (VecTy == LLT::fixed_vector(4, 16))
3348 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3349 else if (VecTy == LLT::fixed_vector(8, 16))
3350 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3351 else
3352 return false;
3353 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3354 }
3355 case TargetOpcode::G_INTRINSIC_TRUNC:
3356 return selectIntrinsicTrunc(I, MRI);
3357 case TargetOpcode::G_INTRINSIC_ROUND:
3358 return selectIntrinsicRound(I, MRI);
3359 case TargetOpcode::G_BUILD_VECTOR:
3360 return selectBuildVector(I, MRI);
3361 case TargetOpcode::G_MERGE_VALUES:
3362 return selectMergeValues(I, MRI);
3363 case TargetOpcode::G_UNMERGE_VALUES:
3364 return selectUnmergeValues(I, MRI);
3365 case TargetOpcode::G_SHUFFLE_VECTOR:
3366 return selectShuffleVector(I, MRI);
3367 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3368 return selectExtractElt(I, MRI);
3369 case TargetOpcode::G_INSERT_VECTOR_ELT:
3370 return selectInsertElt(I, MRI);
3371 case TargetOpcode::G_CONCAT_VECTORS:
3372 return selectConcatVectors(I, MRI);
3373 case TargetOpcode::G_JUMP_TABLE:
3374 return selectJumpTable(I, MRI);
3375 case TargetOpcode::G_VECREDUCE_FADD:
3376 case TargetOpcode::G_VECREDUCE_ADD:
3377 return selectReduction(I, MRI);
3378 }
3379
3380 return false;
3381}
3382
3383bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3384 MachineRegisterInfo &MRI) {
3385 Register VecReg = I.getOperand(1).getReg();
3386 LLT VecTy = MRI.getType(VecReg);
3387 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3388 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3389 // a subregister copy afterwards.
3390 if (VecTy == LLT::fixed_vector(2, 32)) {
3391 Register DstReg = I.getOperand(0).getReg();
3392 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3393 {VecReg, VecReg});
3394 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3395 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3396 .getReg(0);
3397 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3398 I.eraseFromParent();
3399 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3400 }
3401
3402 unsigned Opc = 0;
3403 if (VecTy == LLT::fixed_vector(16, 8))
3404 Opc = AArch64::ADDVv16i8v;
3405 else if (VecTy == LLT::fixed_vector(8, 16))
3406 Opc = AArch64::ADDVv8i16v;
3407 else if (VecTy == LLT::fixed_vector(4, 32))
3408 Opc = AArch64::ADDVv4i32v;
3409 else if (VecTy == LLT::fixed_vector(2, 64))
3410 Opc = AArch64::ADDPv2i64p;
3411 else {
3412 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { } while (false);
3413 return false;
3414 }
3415 I.setDesc(TII.get(Opc));
3416 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3417 }
3418
3419 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3420 unsigned Opc = 0;
3421 if (VecTy == LLT::fixed_vector(2, 32))
3422 Opc = AArch64::FADDPv2i32p;
3423 else if (VecTy == LLT::fixed_vector(2, 64))
3424 Opc = AArch64::FADDPv2i64p;
3425 else {
3426 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { } while (false);
3427 return false;
3428 }
3429 I.setDesc(TII.get(Opc));
3430 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3431 }
3432 return false;
3433}
3434
3435bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3436 MachineRegisterInfo &MRI) {
3437 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast<void> (0));
3438 Register JTAddr = I.getOperand(0).getReg();
3439 unsigned JTI = I.getOperand(1).getIndex();
3440 Register Index = I.getOperand(2).getReg();
3441
3442 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3443 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3444
3445 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3446 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3447 {TargetReg, ScratchReg}, {JTAddr, Index})
3448 .addJumpTableIndex(JTI);
3449 // Build the indirect branch.
3450 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3451 I.eraseFromParent();
3452 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3453}
3454
3455bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3456 MachineRegisterInfo &MRI) {
3457 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast<void> (0));
3458 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast<void> (0));
3459
3460 Register DstReg = I.getOperand(0).getReg();
3461 unsigned JTI = I.getOperand(1).getIndex();
3462 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3463 auto MovMI =
3464 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3465 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3466 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3467 I.eraseFromParent();
3468 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3469}
3470
3471bool AArch64InstructionSelector::selectTLSGlobalValue(
3472 MachineInstr &I, MachineRegisterInfo &MRI) {
3473 if (!STI.isTargetMachO())
3474 return false;
3475 MachineFunction &MF = *I.getParent()->getParent();
3476 MF.getFrameInfo().setAdjustsStack(true);
3477
3478 const auto &GlobalOp = I.getOperand(1);
3479 assert(GlobalOp.getOffset() == 0 &&(static_cast<void> (0))
3480 "Shouldn't have an offset on TLS globals!")(static_cast<void> (0));
3481 const GlobalValue &GV = *GlobalOp.getGlobal();
3482
3483 auto LoadGOT =
3484 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3485 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3486
3487 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3488 {LoadGOT.getReg(0)})
3489 .addImm(0);
3490
3491 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3492 // TLS calls preserve all registers except those that absolutely must be
3493 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3494 // silly).
3495 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3496 .addUse(AArch64::X0, RegState::Implicit)
3497 .addDef(AArch64::X0, RegState::Implicit)
3498 .addRegMask(TRI.getTLSCallPreservedMask());
3499
3500 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3501 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3502 MRI);
3503 I.eraseFromParent();
3504 return true;
3505}
3506
3507bool AArch64InstructionSelector::selectIntrinsicTrunc(
3508 MachineInstr &I, MachineRegisterInfo &MRI) const {
3509 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3510
3511 // Select the correct opcode.
3512 unsigned Opc = 0;
3513 if (!SrcTy.isVector()) {
3514 switch (SrcTy.getSizeInBits()) {
3515 default:
3516 case 16:
3517 Opc = AArch64::FRINTZHr;
3518 break;
3519 case 32:
3520 Opc = AArch64::FRINTZSr;
3521 break;
3522 case 64:
3523 Opc = AArch64::FRINTZDr;
3524 break;
3525 }
3526 } else {
3527 unsigned NumElts = SrcTy.getNumElements();
3528 switch (SrcTy.getElementType().getSizeInBits()) {
3529 default:
3530 break;
3531 case 16:
3532 if (NumElts == 4)
3533 Opc = AArch64::FRINTZv4f16;
3534 else if (NumElts == 8)
3535 Opc = AArch64::FRINTZv8f16;
3536 break;
3537 case 32:
3538 if (NumElts == 2)
3539 Opc = AArch64::FRINTZv2f32;
3540 else if (NumElts == 4)
3541 Opc = AArch64::FRINTZv4f32;
3542 break;
3543 case 64:
3544 if (NumElts == 2)
3545 Opc = AArch64::FRINTZv2f64;
3546 break;
3547 }
3548 }
3549
3550 if (!Opc) {
3551 // Didn't get an opcode above, bail.
3552 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { } while (false);
3553 return false;
3554 }
3555
3556 // Legalization would have set us up perfectly for this; we just need to
3557 // set the opcode and move on.
3558 I.setDesc(TII.get(Opc));
3559 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3560}
3561
3562bool AArch64InstructionSelector::selectIntrinsicRound(
3563 MachineInstr &I, MachineRegisterInfo &MRI) const {
3564 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3565
3566 // Select the correct opcode.
3567 unsigned Opc = 0;
3568 if (!SrcTy.isVector()) {
3569 switch (SrcTy.getSizeInBits()) {
3570 default:
3571 case 16:
3572 Opc = AArch64::FRINTAHr;
3573 break;
3574 case 32:
3575 Opc = AArch64::FRINTASr;
3576 break;
3577 case 64:
3578 Opc = AArch64::FRINTADr;
3579 break;
3580 }
3581 } else {
3582 unsigned NumElts = SrcTy.getNumElements();
3583 switch (SrcTy.getElementType().getSizeInBits()) {
3584 default:
3585 break;
3586 case 16:
3587 if (NumElts == 4)
3588 Opc = AArch64::FRINTAv4f16;
3589 else if (NumElts == 8)
3590 Opc = AArch64::FRINTAv8f16;
3591 break;
3592 case 32:
3593 if (NumElts == 2)
3594 Opc = AArch64::FRINTAv2f32;
3595 else if (NumElts == 4)
3596 Opc = AArch64::FRINTAv4f32;
3597 break;
3598 case 64:
3599 if (NumElts == 2)
3600 Opc = AArch64::FRINTAv2f64;
3601 break;
3602 }
3603 }
3604
3605 if (!Opc) {
3606 // Didn't get an opcode above, bail.
3607 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { } while (false);
3608 return false;
3609 }
3610
3611 // Legalization would have set us up perfectly for this; we just need to
3612 // set the opcode and move on.
3613 I.setDesc(TII.get(Opc));
3614 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3615}
3616
3617bool AArch64InstructionSelector::selectVectorICmp(
3618 MachineInstr &I, MachineRegisterInfo &MRI) {
3619 Register DstReg = I.getOperand(0).getReg();
3620 LLT DstTy = MRI.getType(DstReg);
3621 Register SrcReg = I.getOperand(2).getReg();
3622 Register Src2Reg = I.getOperand(3).getReg();
3623 LLT SrcTy = MRI.getType(SrcReg);
3624
3625 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3626 unsigned NumElts = DstTy.getNumElements();
3627
3628 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3629 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3630 // Third index is cc opcode:
3631 // 0 == eq
3632 // 1 == ugt
3633 // 2 == uge
3634 // 3 == ult
3635 // 4 == ule
3636 // 5 == sgt
3637 // 6 == sge
3638 // 7 == slt
3639 // 8 == sle
3640 // ne is done by negating 'eq' result.
3641
3642 // This table below assumes that for some comparisons the operands will be
3643 // commuted.
3644 // ult op == commute + ugt op
3645 // ule op == commute + uge op
3646 // slt op == commute + sgt op
3647 // sle op == commute + sge op
3648 unsigned PredIdx = 0;
3649 bool SwapOperands = false;
3650 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3651 switch (Pred) {
3652 case CmpInst::ICMP_NE:
3653 case CmpInst::ICMP_EQ:
3654 PredIdx = 0;
3655 break;
3656 case CmpInst::ICMP_UGT:
3657 PredIdx = 1;
3658 break;
3659 case CmpInst::ICMP_UGE:
3660 PredIdx = 2;
3661 break;
3662 case CmpInst::ICMP_ULT:
3663 PredIdx = 3;
3664 SwapOperands = true;
3665 break;
3666 case CmpInst::ICMP_ULE:
3667 PredIdx = 4;
3668 SwapOperands = true;
3669 break;
3670 case CmpInst::ICMP_SGT:
3671 PredIdx = 5;
3672 break;
3673 case CmpInst::ICMP_SGE:
3674 PredIdx = 6;
3675 break;
3676 case CmpInst::ICMP_SLT:
3677 PredIdx = 7;
3678 SwapOperands = true;
3679 break;
3680 case CmpInst::ICMP_SLE:
3681 PredIdx = 8;
3682 SwapOperands = true;
3683 break;
3684 default:
3685 llvm_unreachable("Unhandled icmp predicate")__builtin_unreachable();
3686 return false;
3687 }
3688
3689 // This table obviously should be tablegen'd when we have our GISel native
3690 // tablegen selector.
3691
3692 static const unsigned OpcTable[4][4][9] = {
3693 {
3694 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3695 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3696 0 /* invalid */},
3697 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3698 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3699 0 /* invalid */},
3700 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3701 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3702 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3703 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3704 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3705 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3706 },
3707 {
3708 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3709 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3710 0 /* invalid */},
3711 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3712 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3713 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3714 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3715 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3716 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3717 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3718 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3719 0 /* invalid */}
3720 },
3721 {
3722 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3723 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3724 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3725 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3726 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3727 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3728 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3729 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3730 0 /* invalid */},
3731 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3732 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3733 0 /* invalid */}
3734 },
3735 {
3736 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3737 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3738 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3739 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3740 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3741 0 /* invalid */},
3742 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3743 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3744 0 /* invalid */},
3745 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3746 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3747 0 /* invalid */}
3748 },
3749 };
3750 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3751 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3752 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3753 if (!Opc) {
3754 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { } while (false);
3755 return false;
3756 }
3757
3758 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3759 const TargetRegisterClass *SrcRC =
3760 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3761 if (!SrcRC) {
3762 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { } while (false);
3763 return false;
3764 }
3765
3766 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3767 if (SrcTy.getSizeInBits() == 128)
3768 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3769
3770 if (SwapOperands)
3771 std::swap(SrcReg, Src2Reg);
3772
3773 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3774 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3775
3776 // Invert if we had a 'ne' cc.
3777 if (NotOpc) {
3778 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3779 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3780 } else {
3781 MIB.buildCopy(DstReg, Cmp.getReg(0));
3782 }
3783 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3784 I.eraseFromParent();
3785 return true;
3786}
3787
3788MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3789 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3790 MachineIRBuilder &MIRBuilder) const {
3791 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3792
3793 auto BuildFn = [&](unsigned SubregIndex) {
3794 auto Ins =
3795 MIRBuilder
3796 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3797 .addImm(SubregIndex);
3798 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3799 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3800 return &*Ins;
3801 };
3802
3803 switch (EltSize) {
3804 case 16:
3805 return BuildFn(AArch64::hsub);
3806 case 32:
3807 return BuildFn(AArch64::ssub);
3808 case 64:
3809 return BuildFn(AArch64::dsub);
3810 default:
3811 return nullptr;
3812 }
3813}
3814
3815bool AArch64InstructionSelector::selectMergeValues(
3816 MachineInstr &I, MachineRegisterInfo &MRI) {
3817 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast<void> (0));
3818 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3819 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3820 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast<void> (0));
3821 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3822
3823 if (I.getNumOperands() != 3)
3824 return false;
3825
3826 // Merging 2 s64s into an s128.
3827 if (DstTy == LLT::scalar(128)) {
3828 if (SrcTy.getSizeInBits() != 64)
3829 return false;
3830 Register DstReg = I.getOperand(0).getReg();
3831 Register Src1Reg = I.getOperand(1).getReg();
3832 Register Src2Reg = I.getOperand(2).getReg();
3833 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3834 MachineInstr *InsMI =
3835 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3836 if (!InsMI)
3837 return false;
3838 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3839 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3840 if (!Ins2MI)
3841 return false;
3842 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3843 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3844 I.eraseFromParent();
3845 return true;
3846 }
3847
3848 if (RB.getID() != AArch64::GPRRegBankID)
3849 return false;
3850
3851 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3852 return false;
3853
3854 auto *DstRC = &AArch64::GPR64RegClass;
3855 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3856 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3857 TII.get(TargetOpcode::SUBREG_TO_REG))
3858 .addDef(SubToRegDef)
3859 .addImm(0)
3860 .addUse(I.getOperand(1).getReg())
3861 .addImm(AArch64::sub_32);
3862 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3863 // Need to anyext the second scalar before we can use bfm
3864 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3865 TII.get(TargetOpcode::SUBREG_TO_REG))
3866 .addDef(SubToRegDef2)
3867 .addImm(0)
3868 .addUse(I.getOperand(2).getReg())
3869 .addImm(AArch64::sub_32);
3870 MachineInstr &BFM =
3871 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3872 .addDef(I.getOperand(0).getReg())
3873 .addUse(SubToRegDef)
3874 .addUse(SubToRegDef2)
3875 .addImm(32)
3876 .addImm(31);
3877 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3878 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3879 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3880 I.eraseFromParent();
3881 return true;
3882}
3883
3884static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3885 const unsigned EltSize) {
3886 // Choose a lane copy opcode and subregister based off of the size of the
3887 // vector's elements.
3888 switch (EltSize) {
3889 case 8:
3890 CopyOpc = AArch64::CPYi8;
3891 ExtractSubReg = AArch64::bsub;
3892 break;
3893 case 16:
3894 CopyOpc = AArch64::CPYi16;
3895 ExtractSubReg = AArch64::hsub;
3896 break;
3897 case 32:
3898 CopyOpc = AArch64::CPYi32;
3899 ExtractSubReg = AArch64::ssub;
3900 break;
3901 case 64:
3902 CopyOpc = AArch64::CPYi64;
3903 ExtractSubReg = AArch64::dsub;
3904 break;
3905 default:
3906 // Unknown size, bail out.
3907 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { } while (false);
3908 return false;
3909 }
3910 return true;
3911}
3912
3913MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3914 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3915 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3916 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3917 unsigned CopyOpc = 0;
3918 unsigned ExtractSubReg = 0;
3919 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3920 LLVM_DEBUG(do { } while (false)
3921 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { } while (false);
3922 return nullptr;
3923 }
3924
3925 const TargetRegisterClass *DstRC =
3926 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3927 if (!DstRC) {
3928 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { } while (false);
3929 return nullptr;
3930 }
3931
3932 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3933 const LLT &VecTy = MRI.getType(VecReg);
3934 const TargetRegisterClass *VecRC =
3935 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3936 if (!VecRC) {
3937 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { } while (false);
3938 return nullptr;
3939 }
3940
3941 // The register that we're going to copy into.
3942 Register InsertReg = VecReg;
3943 if (!DstReg)
3944 DstReg = MRI.createVirtualRegister(DstRC);
3945 // If the lane index is 0, we just use a subregister COPY.
3946 if (LaneIdx == 0) {
3947 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3948 .addReg(VecReg, 0, ExtractSubReg);
3949 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3950 return &*Copy;
3951 }
3952
3953 // Lane copies require 128-bit wide registers. If we're dealing with an
3954 // unpacked vector, then we need to move up to that width. Insert an implicit
3955 // def and a subregister insert to get us there.
3956 if (VecTy.getSizeInBits() != 128) {
3957 MachineInstr *ScalarToVector = emitScalarToVector(
3958 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3959 if (!ScalarToVector)
3960 return nullptr;
3961 InsertReg = ScalarToVector->getOperand(0).getReg();
3962 }
3963
3964 MachineInstr *LaneCopyMI =
3965 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3966 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3967
3968 // Make sure that we actually constrain the initial copy.
3969 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3970 return LaneCopyMI;
3971}
3972
3973bool AArch64InstructionSelector::selectExtractElt(
3974 MachineInstr &I, MachineRegisterInfo &MRI) {
3975 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast<void> (0))
3976 "unexpected opcode!")(static_cast<void> (0));
3977 Register DstReg = I.getOperand(0).getReg();
3978 const LLT NarrowTy = MRI.getType(DstReg);
3979 const Register SrcReg = I.getOperand(1).getReg();
3980 const LLT WideTy = MRI.getType(SrcReg);
3981 (void)WideTy;
3982 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast<void> (0))
3983 "source register size too small!")(static_cast<void> (0));
3984 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast<void> (0));
3985
3986 // Need the lane index to determine the correct copy opcode.
3987 MachineOperand &LaneIdxOp = I.getOperand(2);
3988 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast<void> (0));
3989
3990 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3991 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { } while (false);
3992 return false;
3993 }
3994
3995 // Find the index to extract from.
3996 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3997 if (!VRegAndVal)
3998 return false;
3999 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4000
4001
4002 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4003 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4004 LaneIdx, MIB);
4005 if (!Extract)
4006 return false;
4007
4008 I.eraseFromParent();
4009 return true;
4010}
4011
4012bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4013 MachineInstr &I, MachineRegisterInfo &MRI) {
4014 unsigned NumElts = I.getNumOperands() - 1;
4015 Register SrcReg = I.getOperand(NumElts).getReg();
4016 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4017 const LLT SrcTy = MRI.getType(SrcReg);
4018
4019 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast<void> (0));
4020 if (SrcTy.getSizeInBits() > 128) {
4021 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { } while (false);
4022 return false;
4023 }
4024
4025 // We implement a split vector operation by treating the sub-vectors as
4026 // scalars and extracting them.
4027 const RegisterBank &DstRB =
4028 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4029 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4030 Register Dst = I.getOperand(OpIdx).getReg();
4031 MachineInstr *Extract =
4032 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4033 if (!Extract)
4034 return false;
4035 }
4036 I.eraseFromParent();
4037 return true;
4038}
4039
4040bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4041 MachineRegisterInfo &MRI) {
4042 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast<void> (0))
4043 "unexpected opcode")(static_cast<void> (0));
4044
4045 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4046 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4047 AArch64::FPRRegBankID ||
4048 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4049 AArch64::FPRRegBankID) {
4050 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { } while (false)
4051 "currently unsupported.\n")do { } while (false);
4052 return false;
4053 }
4054
4055 // The last operand is the vector source register, and every other operand is
4056 // a register to unpack into.
4057 unsigned NumElts = I.getNumOperands() - 1;
4058 Register SrcReg = I.getOperand(NumElts).getReg();
4059 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4060 const LLT WideTy = MRI.getType(SrcReg);
4061 (void)WideTy;
4062 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast<void> (0))
4063 "can only unmerge from vector or s128 types!")(static_cast<void> (0));
4064 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast<void> (0))
4065 "source register size too small!")(static_cast<void> (0));
4066
4067 if (!NarrowTy.isScalar())
4068 return selectSplitVectorUnmerge(I, MRI);
4069
4070 // Choose a lane copy opcode and subregister based off of the size of the
4071 // vector's elements.
4072 unsigned CopyOpc = 0;
4073 unsigned ExtractSubReg = 0;
4074 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4075 return false;
4076
4077 // Set up for the lane copies.
4078 MachineBasicBlock &MBB = *I.getParent();
4079
4080 // Stores the registers we'll be copying from.
4081 SmallVector<Register, 4> InsertRegs;
4082
4083 // We'll use the first register twice, so we only need NumElts-1 registers.
4084 unsigned NumInsertRegs = NumElts - 1;
4085
4086 // If our elements fit into exactly 128 bits, then we can copy from the source
4087 // directly. Otherwise, we need to do a bit of setup with some subregister
4088 // inserts.
4089 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4090 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4091 } else {
4092 // No. We have to perform subregister inserts. For each insert, create an
4093 // implicit def and a subregister insert, and save the register we create.
4094 const TargetRegisterClass *RC =
4095 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4096 WideTy.getScalarSizeInBits() * NumElts);
4097 unsigned SubReg = 0;
4098 bool Found = getSubRegForClass(RC, TRI, SubReg);
4099 (void)Found;
4100 assert(Found && "expected to find last operand's subeg idx")(static_cast<void> (0));
4101 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4102 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4103 MachineInstr &ImpDefMI =
4104 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4105 ImpDefReg);
4106
4107 // Now, create the subregister insert from SrcReg.
4108 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4109 MachineInstr &InsMI =
4110 *BuildMI(MBB, I, I.getDebugLoc(),
4111 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4112 .addUse(ImpDefReg)
4113 .addUse(SrcReg)
4114 .addImm(SubReg);
4115
4116 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4117 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4118
4119 // Save the register so that we can copy from it after.
4120 InsertRegs.push_back(InsertReg);
4121 }
4122 }
4123
4124 // Now that we've created any necessary subregister inserts, we can
4125 // create the copies.
4126 //
4127 // Perform the first copy separately as a subregister copy.
4128 Register CopyTo = I.getOperand(0).getReg();
4129 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4130 .addReg(InsertRegs[0], 0, ExtractSubReg);
4131 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4132
4133 // Now, perform the remaining copies as vector lane copies.
4134 unsigned LaneIdx = 1;
4135 for (Register InsReg : InsertRegs) {
4136 Register CopyTo = I.getOperand(LaneIdx).getReg();
4137 MachineInstr &CopyInst =
4138 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4139 .addUse(InsReg)
4140 .addImm(LaneIdx);
4141 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4142 ++LaneIdx;
4143 }
4144
4145 // Separately constrain the first copy's destination. Because of the
4146 // limitation in constrainOperandRegClass, we can't guarantee that this will
4147 // actually be constrained. So, do it ourselves using the second operand.
4148 const TargetRegisterClass *RC =
4149 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4150 if (!RC) {
4151 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { } while (false);
4152 return false;
4153 }
4154
4155 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4156 I.eraseFromParent();
4157 return true;
4158}
4159
4160bool AArch64InstructionSelector::selectConcatVectors(
4161 MachineInstr &I, MachineRegisterInfo &MRI) {
4162 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast<void> (0))
4163 "Unexpected opcode")(static_cast<void> (0));
4164 Register Dst = I.getOperand(0).getReg();
4165 Register Op1 = I.getOperand(1).getReg();
4166 Register Op2 = I.getOperand(2).getReg();
4167 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4168 if (!ConcatMI)
4169 return false;
4170 I.eraseFromParent();
4171 return true;
4172}
4173
4174unsigned
4175AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4176 MachineFunction &MF) const {
4177 Type *CPTy = CPVal->getType();
4178 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4179
4180 MachineConstantPool *MCP = MF.getConstantPool();
4181 return MCP->getConstantPoolIndex(CPVal, Alignment);
4182}
4183
4184MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4185 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4186 auto &MF = MIRBuilder.getMF();
4187 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4188
4189 auto Adrp =
4190 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4191 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4192
4193 MachineInstr *LoadMI = nullptr;
4194 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4195 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4196 switch (Size) {
4197 case 16:
4198 LoadMI =
4199 &*MIRBuilder
4200 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4201 .addConstantPoolIndex(CPIdx, 0,
4202 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4203 break;
4204 case 8:
4205 LoadMI =
4206 &*MIRBuilder
4207 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4208 .addConstantPoolIndex(CPIdx, 0,
4209 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4210 break;
4211 case 4:
4212 LoadMI =
4213 &*MIRBuilder
4214 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4215 .addConstantPoolIndex(CPIdx, 0,
4216 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4217 break;
4218 default:
4219 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { } while (false)
4220 << *CPVal->getType())do { } while (false);
4221 return nullptr;
4222 }
4223 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4224 MachineMemOperand::MOLoad,
4225 Size, Align(Size)));
4226 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4227 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4228 return LoadMI;
4229}
4230
4231/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4232/// size and RB.
4233static std::pair<unsigned, unsigned>
4234getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4235 unsigned Opc, SubregIdx;
4236 if (RB.getID() == AArch64::GPRRegBankID) {
4237 if (EltSize == 16) {
4238 Opc = AArch64::INSvi16gpr;
4239 SubregIdx = AArch64::ssub;
4240 } else if (EltSize == 32) {
4241 Opc = AArch64::INSvi32gpr;
4242 SubregIdx = AArch64::ssub;
4243 } else if (EltSize == 64) {
4244 Opc = AArch64::INSvi64gpr;
4245 SubregIdx = AArch64::dsub;
4246 } else {
4247 llvm_unreachable("invalid elt size!")__builtin_unreachable();
4248 }
4249 } else {
4250 if (EltSize == 8) {
4251 Opc = AArch64::INSvi8lane;
4252 SubregIdx = AArch64::bsub;
4253 } else if (EltSize == 16) {
4254 Opc = AArch64::INSvi16lane;
4255 SubregIdx = AArch64::hsub;
4256 } else if (EltSize == 32) {
4257 Opc = AArch64::INSvi32lane;
4258 SubregIdx = AArch64::ssub;
4259 } else if (EltSize == 64) {
4260 Opc = AArch64::INSvi64lane;
4261 SubregIdx = AArch64::dsub;
4262 } else {
4263 llvm_unreachable("invalid elt size!")__builtin_unreachable();
4264 }
4265 }
4266 return std::make_pair(Opc, SubregIdx);
4267}
4268
4269MachineInstr *AArch64InstructionSelector::emitInstr(
4270 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4271 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4272 const ComplexRendererFns &RenderFns) const {
4273 assert(Opcode && "Expected an opcode?")(static_cast<void> (0));
4274 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast<void> (0))
4275 "Function should only be used to produce selected instructions!")(static_cast<void> (0));
4276 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4277 if (RenderFns)
4278 for (auto &Fn : *RenderFns)
4279 Fn(MI);
4280 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4281 return &*MI;
4282}
4283
4284MachineInstr *AArch64InstructionSelector::emitAddSub(
4285 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4286 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4287 MachineIRBuilder &MIRBuilder) const {
4288 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4289 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast<void> (0));
4290 auto Ty = MRI.getType(LHS.getReg());
4291 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast<void> (0));
4292 unsigned Size = Ty.getSizeInBits();
4293 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast<void> (0));
4294 bool Is32Bit = Size == 32;
4295
4296 // INSTRri form with positive arithmetic immediate.
4297 if (auto Fns = selectArithImmed(RHS))
4298 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4299 MIRBuilder, Fns);
4300
4301 // INSTRri form with negative arithmetic immediate.
4302 if (auto Fns = selectNegArithImmed(RHS))
4303 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4304 MIRBuilder, Fns);
4305
4306 // INSTRrx form.
4307 if (auto Fns = selectArithExtendedRegister(RHS))
4308 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4309 MIRBuilder, Fns);
4310
4311 // INSTRrs form.
4312 if (auto Fns = selectShiftedRegister(RHS))
4313 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4314 MIRBuilder, Fns);
4315 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4316 MIRBuilder);
4317}
4318
4319MachineInstr *
4320AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4321 MachineOperand &RHS,
4322 MachineIRBuilder &MIRBuilder) const {
4323 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4324 {{AArch64::ADDXri, AArch64::ADDWri},
4325 {AArch64::ADDXrs, AArch64::ADDWrs},
4326 {AArch64::ADDXrr, AArch64::ADDWrr},
4327 {AArch64::SUBXri, AArch64::SUBWri},
4328 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4329 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4330}
4331
4332MachineInstr *
4333AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4334 MachineOperand &RHS,
4335 MachineIRBuilder &MIRBuilder) const {
4336 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4337 {{AArch64::ADDSXri, AArch64::ADDSWri},
4338 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4339 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4340 {AArch64::SUBSXri, AArch64::SUBSWri},
4341 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4342 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4343}
4344
4345MachineInstr *
4346AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4347 MachineOperand &RHS,
4348 MachineIRBuilder &MIRBuilder) const {
4349 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4350 {{AArch64::SUBSXri, AArch64::SUBSWri},
4351 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4352 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4353 {AArch64::ADDSXri, AArch64::ADDSWri},
4354 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4355 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4356}
4357
4358MachineInstr *
4359AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4360 MachineIRBuilder &MIRBuilder) const {
4361 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4362 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4363 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4364 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4365}
4366
4367MachineInstr *
4368AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4369 MachineIRBuilder &MIRBuilder) const {
4370 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast<void> (0));
4371 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4372 LLT Ty = MRI.getType(LHS.getReg());
4373 unsigned RegSize = Ty.getSizeInBits();
4374 bool Is32Bit = (RegSize == 32);
4375 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4376 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4377 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4378 // ANDS needs a logical immediate for its immediate form. Check if we can
4379 // fold one in.
4380 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4381 int64_t Imm = ValAndVReg->Value.getSExtValue();
4382
4383 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4384 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4385 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4386 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4387 return &*TstMI;
4388 }
4389 }
4390
4391 if (auto Fns = selectLogicalShiftedRegister(RHS))
4392 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4393 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4394}
4395
4396MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4397 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4398 MachineIRBuilder &MIRBuilder) const {
4399 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast<void> (0));
4400 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast<void> (0));
4401 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4402 LLT CmpTy = MRI.getType(LHS.getReg());
4403 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast<void> (0));
4404 unsigned Size = CmpTy.getSizeInBits();
4405 (void)Size;
4406 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast<void> (0));
4407 // Fold the compare into a cmn or tst if possible.
4408 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4409 return FoldCmp;
4410 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4411 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4412}
4413
4414MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4415 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4416 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4417#ifndef NDEBUG1
4418 LLT Ty = MRI.getType(Dst);
4419 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast<void> (0))
4420 "Expected a 32-bit scalar register?")(static_cast<void> (0));
4421#endif
4422 const Register ZeroReg = AArch64::WZR;
4423 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4424 auto CSet =
4425 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4426 .addImm(getInvertedCondCode(CC));
4427 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4428 return &*CSet;
4429 };
4430
4431 AArch64CC::CondCode CC1, CC2;
4432 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4433 if (CC2 == AArch64CC::AL)
4434 return EmitCSet(Dst, CC1);
4435
4436 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4437 Register Def1Reg = MRI.createVirtualRegister(RC);
4438 Register Def2Reg = MRI.createVirtualRegister(RC);
4439 EmitCSet(Def1Reg, CC1);
4440 EmitCSet(Def2Reg, CC2);
4441 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4442 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4443 return &*OrMI;
4444}
4445
4446MachineInstr *
4447AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4448 MachineIRBuilder &MIRBuilder,
4449 Optional<CmpInst::Predicate> Pred) const {
4450 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4451 LLT Ty = MRI.getType(LHS);
4452 if (Ty.isVector())
4453 return nullptr;
4454 unsigned OpSize = Ty.getSizeInBits();
4455 if (OpSize != 32 && OpSize != 64)
4456 return nullptr;
4457
4458 // If this is a compare against +0.0, then we don't have
4459 // to explicitly materialize a constant.
4460 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4461 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4462
4463 auto IsEqualityPred = [](CmpInst::Predicate P) {
4464 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4465 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4466 };
4467 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4468 // Try commutating the operands.
4469 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4470 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4471 ShouldUseImm = true;
4472 std::swap(LHS, RHS);
4473 }
4474 }
4475 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4476 {AArch64::FCMPSri, AArch64::FCMPDri}};
4477 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4478
4479 // Partially build the compare. Decide if we need to add a use for the
4480 // third operand based off whether or not we're comparing against 0.0.
4481 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4482 if (!ShouldUseImm)
4483 CmpMI.addUse(RHS);
4484 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4485 return &*CmpMI;
4486}
4487
4488MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4489 Optional<Register> Dst, Register Op1, Register Op2,
4490 MachineIRBuilder &MIRBuilder) const {
4491 // We implement a vector concat by:
4492 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4493 // 2. Insert the upper vector into the destination's upper element
4494 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4495 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4496
4497 const LLT Op1Ty = MRI.getType(Op1);
4498 const LLT Op2Ty = MRI.getType(Op2);
4499
4500 if (Op1Ty != Op2Ty) {
4501 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { } while (false);
4502 return nullptr;
4503 }
4504 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast<void> (0));
4505
4506 if (Op1Ty.getSizeInBits() >= 128) {
4507 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { } while (false);
4508 return nullptr;
4509 }
4510
4511 // At the moment we just support 64 bit vector concats.
4512 if (Op1Ty.getSizeInBits() != 64) {
4513 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { } while (false);
4514 return nullptr;
4515 }
4516
4517 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4518 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4519 const TargetRegisterClass *DstRC =
4520 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4521
4522 MachineInstr *WidenedOp1 =
4523 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4524 MachineInstr *WidenedOp2 =
4525 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4526 if (!WidenedOp1 || !WidenedOp2) {
4527 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { } while (false);
4528 return nullptr;
4529 }
4530
4531 // Now do the insert of the upper element.
4532 unsigned InsertOpc, InsSubRegIdx;
4533 std::tie(InsertOpc, InsSubRegIdx) =
4534 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4535
4536 if (!Dst)
4537 Dst = MRI.createVirtualRegister(DstRC);
4538 auto InsElt =
4539 MIRBuilder
4540 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4541 .addImm(1) /* Lane index */
4542 .addUse(WidenedOp2->getOperand(0).getReg())
4543 .addImm(0);
4544 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4545 return &*InsElt;
4546}
4547
4548MachineInstr *
4549AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4550 MachineIRBuilder &MIRBuilder,
4551 Register SrcReg) const {
4552 // CSINC increments the result when the predicate is false. Invert it.
4553 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4554 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4555 auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4556 .addImm(InvCC);
4557 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4558 return &*I;
4559}
4560
4561std::pair<MachineInstr *, AArch64CC::CondCode>
4562AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4563 MachineOperand &LHS,
4564 MachineOperand &RHS,
4565 MachineIRBuilder &MIRBuilder) const {
4566 switch (Opcode) {
4567 default:
4568 llvm_unreachable("Unexpected opcode!")__builtin_unreachable();
4569 case TargetOpcode::G_SADDO:
4570 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4571 case TargetOpcode::G_UADDO:
4572 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4573 case TargetOpcode::G_SSUBO:
4574 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4575 case TargetOpcode::G_USUBO:
4576 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4577 }
4578}
4579
4580bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4581 MachineRegisterInfo &MRI = *MIB.getMRI();
4582 // We want to recognize this pattern:
4583 //
4584 // $z = G_FCMP pred, $x, $y
4585 // ...
4586 // $w = G_SELECT $z, $a, $b
4587 //
4588 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4589 // some copies/truncs in between.)
4590 //
4591 // If we see this, then we can emit something like this:
4592 //
4593 // fcmp $x, $y
4594 // fcsel $w, $a, $b, pred
4595 //
4596 // Rather than emitting both of the rather long sequences in the standard
4597 // G_FCMP/G_SELECT select methods.
4598
4599 // First, check if the condition is defined by a compare.
4600 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4601 while (CondDef) {
4602 // We can only fold if all of the defs have one use.
4603 Register CondDefReg = CondDef->getOperand(0).getReg();
4604 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4605 // Unless it's another select.
4606 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4607 if (CondDef == &UI)
4608 continue;
4609 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4610 return false;
4611 }
4612 }
4613
4614 // We can skip over G_TRUNC since the condition is 1-bit.
4615 // Truncating/extending can have no impact on the value.
4616 unsigned Opc = CondDef->getOpcode();
4617 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4618 break;
4619
4620 // Can't see past copies from physregs.
4621 if (Opc == TargetOpcode::COPY &&
4622 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4623 return false;
4624
4625 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4626 }
4627
4628 // Is the condition defined by a compare?
4629 if (!CondDef)
4630 return false;
4631
4632 unsigned CondOpc = CondDef->getOpcode();
4633 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4634 return false;
4635
4636 AArch64CC::CondCode CondCode;
4637 if (CondOpc == TargetOpcode::G_ICMP) {
4638 auto Pred =
4639 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4640 CondCode = changeICMPPredToAArch64CC(Pred);
4641 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4642 CondDef->getOperand(1), MIB);
4643 } else {
4644 // Get the condition code for the select.
4645 auto Pred =
4646 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4647 AArch64CC::CondCode CondCode2;
4648 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4649
4650 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4651 // instructions to emit the comparison.
4652 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4653 // unnecessary.
4654 if (CondCode2 != AArch64CC::AL)
4655 return false;
4656
4657 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4658 CondDef->getOperand(3).getReg(), MIB)) {
4659 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { } while (false);
4660 return false;
4661 }
4662 }
4663
4664 // Emit the select.
4665 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4666 I.getOperand(3).getReg(), CondCode, MIB);
4667 I.eraseFromParent();
4668 return true;
4669}
4670
4671MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4672 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4673 MachineIRBuilder &MIRBuilder) const {
4674 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast<void> (0))
4675 "Unexpected MachineOperand")(static_cast<void> (0));
4676 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4677 // We want to find this sort of thing:
4678 // x = G_SUB 0, y
4679 // G_ICMP z, x
4680 //
4681 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4682 // e.g:
4683 //
4684 // cmn z, y
4685
4686 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4687 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4688 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4689 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4690 // Given this:
4691 //
4692 // x = G_SUB 0, y
4693 // G_ICMP x, z
4694 //
4695 // Produce this:
4696 //
4697 // cmn y, z
4698 if (isCMN(LHSDef, P, MRI))
4699 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4700
4701 // Same idea here, but with the RHS of the compare instead:
4702 //
4703 // Given this:
4704 //
4705 // x = G_SUB 0, y
4706 // G_ICMP z, x
4707 //
4708 // Produce this:
4709 //
4710 // cmn z, y
4711 if (isCMN(RHSDef, P, MRI))
4712 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4713
4714 // Given this:
4715 //
4716 // z = G_AND x, y
4717 // G_ICMP z, 0
4718 //
4719 // Produce this if the compare is signed:
4720 //
4721 // tst x, y
4722 if (!CmpInst::isUnsigned(P) && LHSDef &&
4723 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4724 // Make sure that the RHS is 0.
4725 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4726 if (!ValAndVReg || ValAndVReg->Value != 0)
4727 return nullptr;
4728
4729 return emitTST(LHSDef->getOperand(1),
4730 LHSDef->getOperand(2), MIRBuilder);
4731 }
4732
4733 return nullptr;
4734}
4735
4736bool AArch64InstructionSelector::selectShuffleVector(
4737 MachineInstr &I, MachineRegisterInfo &MRI) {
4738 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4739 Register Src1Reg = I.getOperand(1).getReg();
4740 const LLT Src1Ty = MRI.getType(Src1Reg);
4741 Register Src2Reg = I.getOperand(2).getReg();
4742 const LLT Src2Ty = MRI.getType(Src2Reg);
4743 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4744
4745 MachineBasicBlock &MBB = *I.getParent();
4746 MachineFunction &MF = *MBB.getParent();
4747 LLVMContext &Ctx = MF.getFunction().getContext();
4748
4749 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4750 // it's originated from a <1 x T> type. Those should have been lowered into
4751 // G_BUILD_VECTOR earlier.
4752 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4753 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { } while (false);
4754 return false;
4755 }
4756
4757 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4758
4759 SmallVector<Constant *, 64> CstIdxs;
4760 for (int Val : Mask) {
4761 // For now, any undef indexes we'll just assume to be 0. This should be
4762 // optimized in future, e.g. to select DUP etc.
4763 Val = Val < 0 ? 0 : Val;
4764 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4765 unsigned Offset = Byte + Val * BytesPerElt;
4766 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4767 }
4768 }
4769
4770 // Use a constant pool to load the index vector for TBL.
4771 Constant *CPVal = ConstantVector::get(CstIdxs);
4772 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4773 if (!IndexLoad) {
4774 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { } while (false);
4775 return false;
4776 }
4777
4778 if (DstTy.getSizeInBits() != 128) {
4779 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast<void> (0));
4780 // This case can be done with TBL1.
4781 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4782 if (!Concat) {
4783 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { } while (false);
4784 return false;
4785 }
4786
4787 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4788 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4789 IndexLoad->getOperand(0).getReg(), MIB);
4790
4791 auto TBL1 = MIB.buildInstr(
4792 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4793 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4794 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4795
4796 auto Copy =
4797 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4798 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4799 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4800 I.eraseFromParent();
4801 return true;
4802 }
4803
4804 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4805 // Q registers for regalloc.
4806 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4807 auto RegSeq = createQTuple(Regs, MIB);
4808 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4809 {RegSeq, IndexLoad->getOperand(0)});
4810 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4811 I.eraseFromParent();
4812 return true;
4813}
4814
4815MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4816 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4817 unsigned LaneIdx, const RegisterBank &RB,
4818 MachineIRBuilder &MIRBuilder) const {
4819 MachineInstr *InsElt = nullptr;
4820 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4821 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4822
4823 // Create a register to define with the insert if one wasn't passed in.
4824 if (!DstReg)
4825 DstReg = MRI.createVirtualRegister(DstRC);
4826
4827 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4828 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4829
4830 if (RB.getID() == AArch64::FPRRegBankID) {
4831 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4832 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4833 .addImm(LaneIdx)
4834 .addUse(InsSub->getOperand(0).getReg())
4835 .addImm(0);
4836 } else {
4837 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4838 .addImm(LaneIdx)
4839 .addUse(EltReg);
4840 }
4841
4842 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4843 return InsElt;
4844}
4845
4846bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4847 MachineRegisterInfo &MRI) {
4848 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast<void> (0));
4849
4850 // Get information on the destination.
4851 Register DstReg = I.getOperand(0).getReg();
4852 const LLT DstTy = MRI.getType(DstReg);
4853 unsigned VecSize = DstTy.getSizeInBits();
4854
4855 // Get information on the element we want to insert into the destination.
4856 Register EltReg = I.getOperand(2).getReg();
4857 const LLT EltTy = MRI.getType(EltReg);
4858 unsigned EltSize = EltTy.getSizeInBits();
4859 if (EltSize < 16 || EltSize > 64)
4860 return false; // Don't support all element types yet.
4861
4862 // Find the definition of the index. Bail out if it's not defined by a
4863 // G_CONSTANT.
4864 Register IdxReg = I.getOperand(3).getReg();
4865 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4866 if (!VRegAndVal)
4867 return false;
4868 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4869
4870 // Perform the lane insert.
4871 Register SrcReg = I.getOperand(1).getReg();
4872 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4873
4874 if (VecSize < 128) {
4875 // If the vector we're inserting into is smaller than 128 bits, widen it
4876 // to 128 to do the insert.
4877 MachineInstr *ScalarToVec =
4878 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
4879 if (!ScalarToVec)
4880 return false;
4881 SrcReg = ScalarToVec->getOperand(0).getReg();
4882 }
4883
4884 // Create an insert into a new FPR128 register.
4885 // Note that if our vector is already 128 bits, we end up emitting an extra
4886 // register.
4887 MachineInstr *InsMI =
4888 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
4889
4890 if (VecSize < 128) {
4891 // If we had to widen to perform the insert, then we have to demote back to
4892 // the original size to get the result we want.
4893 Register DemoteVec = InsMI->getOperand(0).getReg();
4894 const TargetRegisterClass *RC =
4895 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4896 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4897 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { } while (false);
4898 return false;
4899 }
4900 unsigned SubReg = 0;
4901 if (!getSubRegForClass(RC, TRI, SubReg))
4902 return false;
4903 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4904 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { } while (false)
4905 << "\n")do { } while (false);
4906 return false;
4907 }
4908 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4909 .addReg(DemoteVec, 0, SubReg);
4910 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4911 } else {
4912 // No widening needed.
4913 InsMI->getOperand(0).setReg(DstReg);
4914 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4915 }
4916
4917 I.eraseFromParent();
4918 return true;
4919}
4920
4921MachineInstr *
4922AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
4923 MachineIRBuilder &MIRBuilder,
4924 MachineRegisterInfo &MRI) {
4925 LLT DstTy = MRI.getType(Dst);
4926 unsigned DstSize = DstTy.getSizeInBits();
4927 if (CV->isNullValue()) {
4928 if (DstSize == 128) {
4929 auto Mov =
4930 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
4931 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4932 return &*Mov;
4933 }
4934
4935 if (DstSize == 64) {
4936 auto Mov =
4937 MIRBuilder
4938 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4939 .addImm(0);
4940 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
4941 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4942 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
4943 return &*Copy;
4944 }
4945 }
4946
4947 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
4948 if (!CPLoad) {
4949 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { } while (false);
4950 return nullptr;
4951 }
4952
4953 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
4954 RBI.constrainGenericRegister(
4955 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
4956 return &*Copy;
4957}
4958
4959bool AArch64InstructionSelector::tryOptConstantBuildVec(
4960 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
4961 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast<void> (0));
4962 unsigned DstSize = DstTy.getSizeInBits();
4963 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast<void> (0));
4964 if (DstSize < 32)
4965 return false;
4966 // Check if we're building a constant vector, in which case we want to
4967 // generate a constant pool load instead of a vector insert sequence.
4968 SmallVector<Constant *, 16> Csts;
4969 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4970 // Try to find G_CONSTANT or G_FCONSTANT
4971 auto *OpMI =
4972 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4973 if (OpMI)
4974 Csts.emplace_back(
4975 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4976 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4977 I.getOperand(Idx).getReg(), MRI)))
4978 Csts.emplace_back(
4979 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4980 else
4981 return false;
4982 }
4983 Constant *CV = ConstantVector::get(Csts);
4984 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
4985 return false;
4986 I.eraseFromParent();
4987 return true;
4988}
4989
4990bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
4991 MachineInstr &I, MachineRegisterInfo &MRI) {
4992 // Given:
4993 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
4994 //
4995 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
4996 Register Dst = I.getOperand(0).getReg();
4997 Register EltReg = I.getOperand(1).getReg();
4998 LLT EltTy = MRI.getType(EltReg);
4999 // If the index isn't on the same bank as its elements, then this can't be a
5000 // SUBREG_TO_REG.
5001 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5002 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5003 if (EltRB != DstRB)
5004 return false;
5005 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5006 [&MRI](const MachineOperand &Op) {
5007 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5008 MRI);
5009 }))
5010 return false;
5011 unsigned SubReg;
5012 const TargetRegisterClass *EltRC =
5013 getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
5014 if (!EltRC)
5015 return false;
5016 const TargetRegisterClass *DstRC =
5017 getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
5018 if (!DstRC)
5019 return false;
5020 if (!getSubRegForClass(EltRC, TRI, SubReg))
5021 return false;
5022 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5023 .addImm(0)
5024 .addUse(EltReg)
5025 .addImm(SubReg);
5026 I.eraseFromParent();
5027 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5028 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5029}
5030
5031bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5032 MachineRegisterInfo &MRI) {
5033 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast<void> (0));
5034 // Until we port more of the optimized selections, for now just use a vector
5035 // insert sequence.
5036 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5037 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5038 unsigned EltSize = EltTy.getSizeInBits();
5039
5040 if (tryOptConstantBuildVec(I, DstTy, MRI))
5041 return true;
5042 if (tryOptBuildVecToSubregToReg(I, MRI))
5043 return true;
5044
5045 if (EltSize < 16 || EltSize > 64)
5046 return false; // Don't support all element types yet.
5047 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5048
5049 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5050 MachineInstr *ScalarToVec =
5051 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5052 I.getOperand(1).getReg(), MIB);
5053 if (!ScalarToVec)
5054 return false;
5055
5056 Register DstVec = ScalarToVec->getOperand(0).getReg();
5057 unsigned DstSize = DstTy.getSizeInBits();
5058
5059 // Keep track of the last MI we inserted. Later on, we might be able to save
5060 // a copy using it.
5061 MachineInstr *PrevMI = nullptr;
5062 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5063 // Note that if we don't do a subregister copy, we can end up making an
5064 // extra register.
5065 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5066 MIB);
5067 DstVec = PrevMI->getOperand(0).getReg();
5068 }
5069
5070 // If DstTy's size in bits is less than 128, then emit a subregister copy
5071 // from DstVec to the last register we've defined.
5072 if (DstSize < 128) {
5073 // Force this to be FPR using the destination vector.
5074 const TargetRegisterClass *RC =
5075 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
5076 if (!RC)
5077 return false;
5078 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5079 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { } while (false);
5080 return false;
5081 }
5082
5083 unsigned SubReg = 0;
5084 if (!getSubRegForClass(RC, TRI, SubReg))
5085 return false;
5086 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5087 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { } while (false)
5088 << "\n")do { } while (false);
5089 return false;
5090 }
5091
5092 Register Reg = MRI.createVirtualRegister(RC);
5093 Register DstReg = I.getOperand(0).getReg();
5094
5095 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5096 MachineOperand &RegOp = I.getOperand(1);
5097 RegOp.setReg(Reg);
5098 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5099 } else {
5100 // We don't need a subregister copy. Save a copy by re-using the
5101 // destination register on the final insert.
5102 assert(PrevMI && "PrevMI was null?")(static_cast<void> (0));
5103 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5104 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5105 }
5106
5107 I.eraseFromParent();
5108 return true;
5109}
5110
5111/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
5112/// ID if it exists, and 0 otherwise.
5113static unsigned findIntrinsicID(MachineInstr &I) {
5114 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
5115 return Op.isIntrinsicID();
5116 });
5117 if (IntrinOp == I.operands_end())
5118 return 0;
5119 return IntrinOp->getIntrinsicID();
5120}
5121
5122bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5123 unsigned NumVecs,
5124 MachineInstr &I) {
5125 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast<void> (0));
5126 assert(Opc && "Expected an opcode?")(static_cast<void> (0));
5127 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast<void> (0));
5128 auto &MRI = *MIB.getMRI();
5129 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5130 unsigned Size = Ty.getSizeInBits();
5131 assert((Size == 64 || Size == 128) &&(static_cast<void> (0))
5132 "Destination must be 64 bits or 128 bits?")(static_cast<void> (0));
5133 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5134 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5135 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast<void> (0));
5136 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5137 Load.cloneMemRefs(I);
5138 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5139 Register SelectedLoadDst = Load->getOperand(0).getReg();
5140 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5141 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5142 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5143 // Emit the subreg copies and immediately select them.
5144 // FIXME: We should refactor our copy code into an emitCopy helper and
5145 // clean up uses of this pattern elsewhere in the selector.
5146 selectCopy(*Vec, TII, MRI, TRI, RBI);
5147 }
5148 return true;
5149}
5150
5151bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5152 MachineInstr &I, MachineRegisterInfo &MRI) {
5153 // Find the intrinsic ID.
5154 unsigned IntrinID = findIntrinsicID(I);
5155 if (!IntrinID)
5156 return false;
5157
5158 const LLT S8 = LLT::scalar(8);
5159 const LLT S16 = LLT::scalar(16);
5160 const LLT S32 = LLT::scalar(32);
5161 const LLT S64 = LLT::scalar(64);
5162 const LLT P0 = LLT::pointer(0, 64);
5163 // Select the instruction.
5164 switch (IntrinID) {
5165 default:
5166 return false;
5167 case Intrinsic::aarch64_ldxp:
5168 case Intrinsic::aarch64_ldaxp: {
5169 auto NewI = MIB.buildInstr(
5170 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5171 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5172 {I.getOperand(3)});
5173 NewI.cloneMemRefs(I);
5174 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5175 break;
5176 }
5177 case Intrinsic::trap:
5178 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5179 break;
5180 case Intrinsic::debugtrap:
5181 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5182 break;
5183 case Intrinsic::ubsantrap:
5184 MIB.buildInstr(AArch64::BRK, {}, {})
5185 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5186 break;
5187 case Intrinsic::aarch64_neon_ld2: {
5188 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5189 unsigned Opc = 0;
5190 if (Ty == LLT::fixed_vector(8, S8))
5191 Opc = AArch64::LD2Twov8b;
5192 else if (Ty == LLT::fixed_vector(16, S8))
5193 Opc = AArch64::LD2Twov16b;
5194 else if (Ty == LLT::fixed_vector(4, S16))
5195 Opc = AArch64::LD2Twov4h;
5196 else if (Ty == LLT::fixed_vector(8, S16))
5197 Opc = AArch64::LD2Twov8h;
5198 else if (Ty == LLT::fixed_vector(2, S32))
5199 Opc = AArch64::LD2Twov2s;
5200 else if (Ty == LLT::fixed_vector(4, S32))
5201 Opc = AArch64::LD2Twov4s;
5202 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5203 Opc = AArch64::LD2Twov2d;
5204 else if (Ty == S64 || Ty == P0)
5205 Opc = AArch64::LD1Twov1d;
5206 else
5207 llvm_unreachable("Unexpected type for ld2!")__builtin_unreachable();
5208 selectVectorLoadIntrinsic(Opc, 2, I);
5209 break;
5210 }
5211 case Intrinsic::aarch64_neon_ld4: {
5212 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5213 unsigned Opc = 0;
5214 if (Ty == LLT::fixed_vector(8, S8))
5215 Opc = AArch64::LD4Fourv8b;
5216 else if (Ty == LLT::fixed_vector(16, S8))
5217 Opc = AArch64::LD4Fourv16b;
5218 else if (Ty == LLT::fixed_vector(4, S16))
5219 Opc = AArch64::LD4Fourv4h;
5220 else if (Ty == LLT::fixed_vector(8, S16))
5221 Opc = AArch64::LD4Fourv8h;
5222 else if (Ty == LLT::fixed_vector(2, S32))
5223 Opc = AArch64::LD4Fourv2s;
5224 else if (Ty == LLT::fixed_vector(4, S32))
5225 Opc = AArch64::LD4Fourv4s;
5226 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5227 Opc = AArch64::LD4Fourv2d;
5228 else if (Ty == S64 || Ty == P0)
5229 Opc = AArch64::LD1Fourv1d;
5230 else
5231 llvm_unreachable("Unexpected type for ld4!")__builtin_unreachable();
5232 selectVectorLoadIntrinsic(Opc, 4, I);
5233 break;
5234 }
5235 case Intrinsic::aarch64_neon_st2: {
5236 Register Src1 = I.getOperand(1).getReg();
5237 Register Src2 = I.getOperand(2).getReg();
5238 Register Ptr = I.getOperand(3).getReg();
5239 LLT Ty = MRI.getType(Src1);
5240 unsigned Opc;
5241 if (Ty == LLT::fixed_vector(8, S8))
5242 Opc = AArch64::ST2Twov8b;
5243 else if (Ty == LLT::fixed_vector(16, S8))
5244 Opc = AArch64::ST2Twov16b;
5245 else if (Ty == LLT::fixed_vector(4, S16))
5246 Opc = AArch64::ST2Twov4h;
5247 else if (Ty == LLT::fixed_vector(8, S16))
5248 Opc = AArch64::ST2Twov8h;
5249 else if (Ty == LLT::fixed_vector(2, S32))
5250 Opc = AArch64::ST2Twov2s;
5251 else if (Ty == LLT::fixed_vector(4, S32))
5252 Opc = AArch64::ST2Twov4s;
5253 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5254 Opc = AArch64::ST2Twov2d;
5255 else if (Ty == S64 || Ty == P0)
5256 Opc = AArch64::ST1Twov1d;
5257 else
5258 llvm_unreachable("Unexpected type for st2!")__builtin_unreachable();
5259 SmallVector<Register, 2> Regs = {Src1, Src2};
5260 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5261 : createDTuple(Regs, MIB);
5262 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5263 Store.cloneMemRefs(I);
5264 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5265 break;
5266 }
5267 }
5268
5269 I.eraseFromParent();
5270 return true;
5271}
5272
5273bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5274 MachineRegisterInfo &MRI) {
5275 unsigned IntrinID = findIntrinsicID(I);
5276 if (!IntrinID)
5277 return false;
5278
5279 switch (IntrinID) {
5280 default:
5281 break;
5282 case Intrinsic::aarch64_crypto_sha1h: {
5283 Register DstReg = I.getOperand(0).getReg();
5284 Register SrcReg = I.getOperand(2).getReg();
5285
5286 // FIXME: Should this be an assert?
5287 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5288 MRI.getType(SrcReg).getSizeInBits() != 32)
5289 return false;
5290
5291 // The operation has to happen on FPRs. Set up some new FPR registers for
5292 // the source and destination if they are on GPRs.
5293 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5294 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5295 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5296
5297 // Make sure the copy ends up getting constrained properly.
5298 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5299 AArch64::GPR32RegClass, MRI);
5300 }
5301
5302 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5303 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5304
5305 // Actually insert the instruction.
5306 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5307 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5308
5309 // Did we create a new register for the destination?
5310 if (DstReg != I.getOperand(0).getReg()) {
5311 // Yep. Copy the result of the instruction back into the original
5312 // destination.
5313 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5314 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5315 AArch64::GPR32RegClass, MRI);
5316 }
5317
5318 I.eraseFromParent();
5319 return true;
5320 }
5321 case Intrinsic::frameaddress:
5322 case Intrinsic::returnaddress: {
5323 MachineFunction &MF = *I.getParent()->getParent();
5324 MachineFrameInfo &MFI = MF.getFrameInfo();
5325
5326 unsigned Depth = I.getOperand(2).getImm();
5327 Register DstReg = I.getOperand(0).getReg();
5328 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5329
5330 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5331 if (!MFReturnAddr) {
5332 // Insert the copy from LR/X30 into the entry block, before it can be
5333 // clobbered by anything.
5334 MFI.setReturnAddressIsTaken(true);
5335 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5336 AArch64::GPR64RegClass);
5337 }
5338
5339 if (STI.hasPAuth()) {
5340 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5341 } else {
5342 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5343 MIB.buildInstr(AArch64::XPACLRI);
5344 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5345 }
5346
5347 I.eraseFromParent();
5348 return true;
5349 }
5350
5351 MFI.setFrameAddressIsTaken(true);
5352 Register FrameAddr(AArch64::FP);
5353 while (Depth--) {
5354 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5355 auto Ldr =
5356 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5357 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5358 FrameAddr = NextFrame;
5359 }
5360
5361 if (IntrinID == Intrinsic::frameaddress)
5362 MIB.buildCopy({DstReg}, {FrameAddr});
5363 else {
5364 MFI.setReturnAddressIsTaken(true);
5365
5366 if (STI.hasPAuth()) {
5367 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5368 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5369 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5370 } else {
5371 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5372 .addImm(1);
5373 MIB.buildInstr(AArch64::XPACLRI);
5374 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5375 }
5376 }
5377
5378 I.eraseFromParent();
5379 return true;
5380 }
5381 case Intrinsic::swift_async_context_addr:
5382 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5383 {Register(AArch64::FP)})
5384 .addImm(8)
5385 .addImm(0);
5386 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5387
5388 MF->getFrameInfo().setFrameAddressIsTaken(true);
5389 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5390 I.eraseFromParent();
5391 return true;
5392 }
5393 return false;
5394}
5395
5396InstructionSelector::ComplexRendererFns
5397AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5398 auto MaybeImmed = getImmedFromMO(Root);
5399 if (MaybeImmed == None || *MaybeImmed > 31)
5400 return None;
5401 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5402 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5403}
5404
5405InstructionSelector::ComplexRendererFns
5406AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5407 auto MaybeImmed = getImmedFromMO(Root);
5408 if (MaybeImmed == None || *MaybeImmed > 31)
5409 return None;
5410 uint64_t Enc = 31 - *MaybeImmed;
5411 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5412}
5413
5414InstructionSelector::ComplexRendererFns
5415AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5416 auto MaybeImmed = getImmedFromMO(Root);
5417 if (MaybeImmed == None || *MaybeImmed > 63)
5418 return None;
5419 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5420 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5421}
5422
5423InstructionSelector::ComplexRendererFns
5424AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5425 auto MaybeImmed = getImmedFromMO(Root);
5426 if (MaybeImmed == None || *MaybeImmed > 63)
5427 return None;
5428 uint64_t Enc = 63 - *MaybeImmed;
5429 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5430}
5431
5432/// Helper to select an immediate value that can be represented as a 12-bit
5433/// value shifted left by either 0 or 12. If it is possible to do so, return
5434/// the immediate and shift value. If not, return None.
5435///
5436/// Used by selectArithImmed and selectNegArithImmed.
5437InstructionSelector::ComplexRendererFns
5438AArch64InstructionSelector::select12BitValueWithLeftShift(
5439 uint64_t Immed) const {
5440 unsigned ShiftAmt;
5441 if (Immed >> 12 == 0) {
5442 ShiftAmt = 0;
5443 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5444 ShiftAmt = 12;
5445 Immed = Immed >> 12;
5446 } else
5447 return None;
5448
5449 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5450 return {{
5451 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5452 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5453 }};
5454}
5455
5456/// SelectArithImmed - Select an immediate value that can be represented as
5457/// a 12-bit value shifted left by either 0 or 12. If so, return true with
5458/// Val set to the 12-bit value and Shift set to the shifter operand.
5459InstructionSelector::ComplexRendererFns
5460AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5461 // This function is called from the addsub_shifted_imm ComplexPattern,
5462 // which lists [imm] as the list of opcode it's interested in, however
5463 // we still need to check whether the operand is actually an immediate
5464 // here because the ComplexPattern opcode list is only used in
5465 // root-level opcode matching.
5466 auto MaybeImmed = getImmedFromMO(Root);
5467 if (MaybeImmed == None)
5468 return None;
5469 return select12BitValueWithLeftShift(*MaybeImmed);
5470}
5471
5472/// SelectNegArithImmed - As above, but negates the value before trying to
5473/// select it.
5474InstructionSelector::ComplexRendererFns
5475AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5476 // We need a register here, because we need to know if we have a 64 or 32
5477 // bit immediate.
5478 if (!Root.isReg())
5479 return None;
5480 auto MaybeImmed = getImmedFromMO(Root);
5481 if (MaybeImmed == None)
5482 return None;
5483 uint64_t Immed = *MaybeImmed;
5484
5485 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5486 // have the opposite effect on the C flag, so this pattern mustn't match under
5487 // those circumstances.
5488 if (Immed == 0)
5489 return None;
5490
5491 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5492 // the root.
5493 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5494 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5495 Immed = ~((uint32_t)Immed) + 1;
5496 else
5497 Immed = ~Immed + 1ULL;
5498
5499 if (Immed & 0xFFFFFFFFFF000000ULL)
5500 return None;
5501
5502 Immed &= 0xFFFFFFULL;
5503 return select12BitValueWithLeftShift(Immed);
5504}
5505
5506/// Return true if it is worth folding MI into an extended register. That is,
5507/// if it's safe to pull it into the addressing mode of a load or store as a
5508/// shift.
5509bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5510 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5511 // Always fold if there is one use, or if we're optimizing for size.
5512 Register DefReg = MI.getOperand(0).getReg();
5513 if (MRI.hasOneNonDBGUse(DefReg) ||
5514 MI.getParent()->getParent()->getFunction().hasOptSize())
5515 return true;
5516
5517 // It's better to avoid folding and recomputing shifts when we don't have a
5518 // fastpath.
5519 if (!STI.hasLSLFast())
5520 return false;
5521
5522 // We have a fastpath, so folding a shift in and potentially computing it
5523 // many times may be beneficial. Check if this is only used in memory ops.
5524 // If it is, then we should fold.
5525 return all_of(MRI.use_nodbg_instructions(DefReg),
5526 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5527}
5528
5529static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5530 switch (Type) {
5531 case AArch64_AM::SXTB:
5532 case AArch64_AM::SXTH:
5533 case AArch64_AM::SXTW:
5534 return true;
5535 default:
5536 return false;
5537 }
5538}
5539
5540InstructionSelector::ComplexRendererFns
5541AArch64InstructionSelector::selectExtendedSHL(
5542 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5543 unsigned SizeInBytes, bool WantsExt) const {
5544 assert(Base.isReg() && "Expected base to be a register operand")(static_cast<void> (0));
5545 assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast<void> (0));
5546
5547 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5548 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5549 if (!OffsetInst)
5550 return None;
5551
5552 unsigned OffsetOpc = OffsetInst->getOpcode();
5553 bool LookedThroughZExt = false;
5554 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5555 // Try to look through a ZEXT.
5556 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5557 return None;
5558
5559 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5560 OffsetOpc = OffsetInst->getOpcode();
5561 LookedThroughZExt = true;
5562
5563 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5564 return None;
5565 }
5566 // Make sure that the memory op is a valid size.
5567 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5568 if (LegalShiftVal == 0)
5569 return None;
5570 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5571 return None;
5572
5573 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5574 // register we will offset is the LHS, and the register containing the
5575 // constant is the RHS.
5576 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5577 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5578 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5579 if (!ValAndVReg) {
5580 // We didn't get a constant on the RHS. If the opcode is a shift, then
5581 // we're done.
5582 if (OffsetOpc == TargetOpcode::G_SHL)
5583 return None;
5584
5585 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5586 std::swap(OffsetReg, ConstantReg);
5587 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5588 if (!ValAndVReg)
5589 return None;
5590 }
5591
5592 // The value must fit into 3 bits, and must be positive. Make sure that is
5593 // true.
5594 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5595
5596 // Since we're going to pull this into a shift, the constant value must be
5597 // a power of 2. If we got a multiply, then we need to check this.
5598 if (OffsetOpc == TargetOpcode::G_MUL) {
5599 if (!isPowerOf2_32(ImmVal))
5600 return None;
5601
5602 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5603 ImmVal = Log2_32(ImmVal);
5604 }
5605
5606 if ((ImmVal & 0x7) != ImmVal)
5607 return None;
5608
5609 // We are only allowed to shift by LegalShiftVal. This shift value is built
5610 // into the instruction, so we can't just use whatever we want.
5611 if (ImmVal != LegalShiftVal)
5612 return None;
5613
5614 unsigned SignExtend = 0;
5615 if (WantsExt) {
5616 // Check if the offset is defined by an extend, unless we looked through a
5617 // G_ZEXT earlier.
5618 if (!LookedThroughZExt) {
5619 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5620 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5621 if (Ext == AArch64_AM::InvalidShiftExtend)
5622 return None;
5623
5624 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5625 // We only support SXTW for signed extension here.
5626 if (SignExtend && Ext != AArch64_AM::SXTW)
5627 return None;
5628 OffsetReg = ExtInst->getOperand(1).getReg();
5629 }
5630
5631 // Need a 32-bit wide register here.
5632 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5633 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5634 }
5635
5636 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5637 // offset. Signify that we are shifting by setting the shift flag to 1.
5638 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5639 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5640 [=](MachineInstrBuilder &MIB) {
5641 // Need to add both immediates here to make sure that they are both
5642 // added to the instruction.
5643 MIB.addImm(SignExtend);
5644 MIB.addImm(1);
5645 }}};
5646}
5647
5648/// This is used for computing addresses like this:
5649///
5650/// ldr x1, [x2, x3, lsl #3]
5651///
5652/// Where x2 is the base register, and x3 is an offset register. The shift-left
5653/// is a constant value specific to this load instruction. That is, we'll never
5654/// see anything other than a 3 here (which corresponds to the size of the
5655/// element being loaded.)
5656InstructionSelector::ComplexRendererFns
5657AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5658 MachineOperand &Root, unsigned SizeInBytes) const {
5659 if (!Root.isReg())
5660 return None;
5661 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5662
5663 // We want to find something like this:
5664 //
5665 // val = G_CONSTANT LegalShiftVal
5666 // shift = G_SHL off_reg val
5667 // ptr = G_PTR_ADD base_reg shift
5668 // x = G_LOAD ptr
5669 //
5670 // And fold it into this addressing mode:
5671 //
5672 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5673
5674 // Check if we can find the G_PTR_ADD.
5675 MachineInstr *PtrAdd =
5676 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5677 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5678 return None;
5679
5680 // Now, try to match an opcode which will match our specific offset.
5681 // We want a G_SHL or a G_MUL.
5682 MachineInstr *OffsetInst =
5683 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5684 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5685 OffsetInst->getOperand(0), SizeInBytes,
5686 /*WantsExt=*/false);
5687}
5688
5689/// This is used for computing addresses like this:
5690///
5691/// ldr x1, [x2, x3]
5692///
5693/// Where x2 is the base register, and x3 is an offset register.
5694///
5695/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5696/// this will do so. Otherwise, it will return None.
5697InstructionSelector::ComplexRendererFns
5698AArch64InstructionSelector::selectAddrModeRegisterOffset(
5699 MachineOperand &Root) const {
5700 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5701
5702 // We need a GEP.
5703 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5704 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5705 return None;
5706
5707 // If this is used more than once, let's not bother folding.
5708 // TODO: Check if they are memory ops. If they are, then we can still fold
5709 // without having to recompute anything.
5710 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5711 return None;
5712
5713 // Base is the GEP's LHS, offset is its RHS.
5714 return {{[=](MachineInstrBuilder &MIB) {
5715 MIB.addUse(Gep->getOperand(1).getReg());
5716 },
5717 [=](MachineInstrBuilder &MIB) {
5718 MIB.addUse(Gep->getOperand(2).getReg());
5719 },
5720 [=](MachineInstrBuilder &MIB) {
5721 // Need to add both immediates here to make sure that they are both
5722 // added to the instruction.
5723 MIB.addImm(0);
5724 MIB.addImm(0);
5725 }}};
5726}
5727
5728/// This is intended to be equivalent to selectAddrModeXRO in
5729/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5730InstructionSelector::ComplexRendererFns
5731AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5732 unsigned SizeInBytes) const {
5733 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5734 if (!Root.isReg())
5735 return None;
5736 MachineInstr *PtrAdd =
5737 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5738 if (!PtrAdd)
5739 return None;
5740
5741 // Check for an immediates which cannot be encoded in the [base + imm]
5742 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5743 // end up with code like:
5744 //
5745 // mov x0, wide
5746 // add x1 base, x0
5747 // ldr x2, [x1, x0]
5748 //
5749 // In this situation, we can use the [base, xreg] addressing mode to save an
5750 // add/sub:
5751 //
5752 // mov x0, wide
5753 // ldr x2, [base, x0]
5754 auto ValAndVReg =
5755 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5756 if (ValAndVReg) {
5757 unsigned Scale = Log2_32(SizeInBytes);
5758 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
5759
5760 // Skip immediates that can be selected in the load/store addresing
5761 // mode.
5762 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5763 ImmOff < (0x1000 << Scale))
5764 return None;
5765
5766 // Helper lambda to decide whether or not it is preferable to emit an add.
5767 auto isPreferredADD = [](int64_t ImmOff) {
5768 // Constants in [0x0, 0xfff] can be encoded in an add.
5769 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5770 return true;
5771
5772 // Can it be encoded in an add lsl #12?
5773 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5774 return false;
5775
5776 // It can be encoded in an add lsl #12, but we may not want to. If it is
5777 // possible to select this as a single movz, then prefer that. A single
5778 // movz is faster than an add with a shift.
5779 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5780 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5781 };
5782
5783 // If the immediate can be encoded in a single add/sub, then bail out.
5784 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5785 return None;
5786 }
5787
5788 // Try to fold shifts into the addressing mode.
5789 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5790 if (AddrModeFns)
5791 return AddrModeFns;
5792
5793 // If that doesn't work, see if it's possible to fold in registers from
5794 // a GEP.
5795 return selectAddrModeRegisterOffset(Root);
5796}
5797
5798/// This is used for computing addresses like this:
5799///
5800/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5801///
5802/// Where we have a 64-bit base register, a 32-bit offset register, and an
5803/// extend (which may or may not be signed).
5804InstructionSelector::ComplexRendererFns
5805AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5806 unsigned SizeInBytes) const {
5807 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5808
5809 MachineInstr *PtrAdd =
5810 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5811 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5812 return None;
5813
5814 MachineOperand &LHS = PtrAdd->getOperand(1);
5815 MachineOperand &RHS = PtrAdd->getOperand(2);
5816 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5817
5818 // The first case is the same as selectAddrModeXRO, except we need an extend.
5819 // In this case, we try to find a shift and extend, and fold them into the
5820 // addressing mode.
5821 //
5822 // E.g.
5823 //
5824 // off_reg = G_Z/S/ANYEXT ext_reg
5825 // val = G_CONSTANT LegalShiftVal
5826 // shift = G_SHL off_reg val
5827 // ptr = G_PTR_ADD base_reg shift
5828 // x = G_LOAD ptr
5829 //
5830 // In this case we can get a load like this:
5831 //
5832 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5833 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5834 SizeInBytes, /*WantsExt=*/true);
5835 if (ExtendedShl)
5836 return ExtendedShl;
5837
5838 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5839 //
5840 // e.g.
5841 // ldr something, [base_reg, ext_reg, sxtw]
5842 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5843 return None;
5844
5845 // Check if this is an extend. We'll get an extend type if it is.
5846 AArch64_AM::ShiftExtendType Ext =
5847 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5848 if (Ext == AArch64_AM::InvalidShiftExtend)
5849 return None;
5850
5851 // Need a 32-bit wide register.
5852 MachineIRBuilder MIB(*PtrAdd);
5853 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5854 AArch64::GPR32RegClass, MIB);
5855 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5856
5857 // Base is LHS, offset is ExtReg.
5858 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5859 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5860 [=](MachineInstrBuilder &MIB) {
5861 MIB.addImm(SignExtend);
5862 MIB.addImm(0);
5863 }}};
5864}
5865
5866/// Select a "register plus unscaled signed 9-bit immediate" address. This
5867/// should only match when there is an offset that is not valid for a scaled
5868/// immediate addressing mode. The "Size" argument is the size in bytes of the
5869/// memory reference, which is needed here to know what is valid for a scaled
5870/// immediate.
5871InstructionSelector::ComplexRendererFns
5872AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5873 unsigned Size) const {
5874 MachineRegisterInfo &MRI =
5875 Root.getParent()->getParent()->getParent()->getRegInfo();
5876
5877 if (!Root.isReg())
15
Calling 'MachineOperand::isReg'
17
Returning from 'MachineOperand::isReg'
18
Taking false branch
5878 return None;
5879
5880 if (!isBaseWithConstantOffset(Root, MRI))
19
Assuming the condition is false
20
Taking false branch
5881 return None;
5882
5883 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5884 if (!RootDef)
21
Assuming 'RootDef' is non-null
22
Taking false branch
5885 return None;
5886
5887 MachineOperand &OffImm = RootDef->getOperand(2);
5888 if (!OffImm.isReg())
23
Calling 'MachineOperand::isReg'
26
Returning from 'MachineOperand::isReg'
27
Taking false branch
5889 return None;
5890 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5891 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
28
Assuming 'RHS' is non-null
29
Assuming the condition is false
30
Taking false branch
5892 return None;
5893 int64_t RHSC;
5894 MachineOperand &RHSOp1 = RHS->getOperand(1);
5895 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
31
Calling 'MachineOperand::isCImm'
34
Returning from 'MachineOperand::isCImm'
35
Assuming the condition is false
36
Taking false branch
5896 return None;
5897 RHSC = RHSOp1.getCImm()->getSExtValue();
5898
5899 // If the offset is valid as a scaled immediate, don't match here.
5900 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
37
Assuming the condition is true
38
Assuming 'RHSC' is >= 0
39
Calling 'Log2_32'
41
Returning from 'Log2_32'
42
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
5901 return None;
5902 if (RHSC >= -256 && RHSC < 256) {
5903 MachineOperand &Base = RootDef->getOperand(1);
5904 return {{
5905 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5906 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5907 }};
5908 }
5909 return None;
5910}
5911
5912InstructionSelector::ComplexRendererFns
5913AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5914 unsigned Size,
5915 MachineRegisterInfo &MRI) const {
5916 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5917 return None;
5918 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5919 if (Adrp.getOpcode() != AArch64::ADRP)
5920 return None;
5921
5922 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5923 auto Offset = Adrp.getOperand(1).getOffset();
5924 if (Offset % Size != 0)
5925 return None;
5926
5927 auto GV = Adrp.getOperand(1).getGlobal();
5928 if (GV->isThreadLocal())
5929 return None;
5930
5931 auto &MF = *RootDef.getParent()->getParent();
5932 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5933 return None;
5934
5935 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5936 MachineIRBuilder MIRBuilder(RootDef);
5937 Register AdrpReg = Adrp.getOperand(0).getReg();
5938 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },