Bug Summary

File:build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 6432, column 63
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-09-04-125545-48738-1 -x c++ /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/ADT/Optional.h"
24#include "llvm/BinaryFormat/Dwarf.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if None is
148 /// provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *emitFPCompare(Register LHS, Register RHS,
220 MachineIRBuilder &MIRBuilder,
221 Optional<CmpInst::Predicate> = None) const;
222
223 MachineInstr *emitInstr(unsigned Opcode,
224 std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = None) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510//
511/// Given a register bank, and a type, return the smallest register class that
512/// can represent that combination.
513static const TargetRegisterClass *
514getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
515 bool GetAllRegSet = false) {
516 if (RB.getID() == AArch64::GPRRegBankID) {
517 if (Ty.getSizeInBits() <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (Ty.getSizeInBits() == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
523 if (Ty.getSizeInBits() == 128)
524 return &AArch64::XSeqPairsClassRegClass;
525 return nullptr;
526 }
527
528 if (RB.getID() == AArch64::FPRRegBankID) {
529 switch (Ty.getSizeInBits()) {
530 case 8:
531 return &AArch64::FPR8RegClass;
532 case 16:
533 return &AArch64::FPR16RegClass;
534 case 32:
535 return &AArch64::FPR32RegClass;
536 case 64:
537 return &AArch64::FPR64RegClass;
538 case 128:
539 return &AArch64::FPR128RegClass;
540 }
541 return nullptr;
542 }
543
544 return nullptr;
545}
546
547/// Given a register bank, and size in bits, return the smallest register class
548/// that can represent that combination.
549static const TargetRegisterClass *
550getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
551 bool GetAllRegSet = false) {
552 unsigned RegBankID = RB.getID();
553
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
563 }
564
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
567 default:
568 return nullptr;
569 case 8:
570 return &AArch64::FPR8RegClass;
571 case 16:
572 return &AArch64::FPR16RegClass;
573 case 32:
574 return &AArch64::FPR32RegClass;
575 case 64:
576 return &AArch64::FPR64RegClass;
577 case 128:
578 return &AArch64::FPR128RegClass;
579 }
580 }
581
582 return nullptr;
583}
584
585/// Returns the correct subregister to use for a given register class.
586static bool getSubRegForClass(const TargetRegisterClass *RC,
587 const TargetRegisterInfo &TRI, unsigned &SubReg) {
588 switch (TRI.getRegSizeInBits(*RC)) {
589 case 8:
590 SubReg = AArch64::bsub;
591 break;
592 case 16:
593 SubReg = AArch64::hsub;
594 break;
595 case 32:
596 if (RC != &AArch64::FPR32RegClass)
597 SubReg = AArch64::sub_32;
598 else
599 SubReg = AArch64::ssub;
600 break;
601 case 64:
602 SubReg = AArch64::dsub;
603 break;
604 default:
605 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
606 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
607 return false;
608 }
609
610 return true;
611}
612
613/// Returns the minimum size the given register bank can hold.
614static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
615 switch (RB.getID()) {
616 case AArch64::GPRRegBankID:
617 return 32;
618 case AArch64::FPRRegBankID:
619 return 8;
620 default:
621 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 621)
;
622 }
623}
624
625/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
626/// Helper function for functions like createDTuple and createQTuple.
627///
628/// \p RegClassIDs - The list of register class IDs available for some tuple of
629/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
630/// expected to contain between 2 and 4 tuple classes.
631///
632/// \p SubRegs - The list of subregister classes associated with each register
633/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
634/// subregister class. The index of each subregister class is expected to
635/// correspond with the index of each register class.
636///
637/// \returns Either the destination register of REG_SEQUENCE instruction that
638/// was created, or the 0th element of \p Regs if \p Regs contains a single
639/// element.
640static Register createTuple(ArrayRef<Register> Regs,
641 const unsigned RegClassIDs[],
642 const unsigned SubRegs[], MachineIRBuilder &MIB) {
643 unsigned NumRegs = Regs.size();
644 if (NumRegs == 1)
645 return Regs[0];
646 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
647 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
;
648 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
649 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
650 auto RegSequence =
651 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
653 RegSequence.addUse(Regs[I]);
654 RegSequence.addImm(SubRegs[I]);
655 }
656 return RegSequence.getReg(0);
657}
658
659/// Create a tuple of D-registers using the registers in \p Regs.
660static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
666}
667
668/// Create a tuple of Q-registers using the registers in \p Regs.
669static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
675}
676
677static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
678 auto &MI = *Root.getParent();
679 auto &MBB = *MI.getParent();
680 auto &MF = *MBB.getParent();
681 auto &MRI = MF.getRegInfo();
682 uint64_t Immed;
683 if (Root.isImm())
684 Immed = Root.getImm();
685 else if (Root.isCImm())
686 Immed = Root.getCImm()->getZExtValue();
687 else if (Root.isReg()) {
688 auto ValAndVReg =
689 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
690 if (!ValAndVReg)
691 return None;
692 Immed = ValAndVReg->Value.getSExtValue();
693 } else
694 return None;
695 return Immed;
696}
697
698/// Check whether \p I is a currently unsupported binary operation:
699/// - it has an unsized type
700/// - an operand is not a vreg
701/// - all operands are not in the same bank
702/// These are checks that should someday live in the verifier, but right now,
703/// these are mostly limitations of the aarch64 selector.
704static bool unsupportedBinOp(const MachineInstr &I,
705 const AArch64RegisterBankInfo &RBI,
706 const MachineRegisterInfo &MRI,
707 const AArch64RegisterInfo &TRI) {
708 LLT Ty = MRI.getType(I.getOperand(0).getReg());
709 if (!Ty.isValid()) {
710 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
711 return true;
712 }
713
714 const RegisterBank *PrevOpBank = nullptr;
715 for (auto &MO : I.operands()) {
716 // FIXME: Support non-register operands.
717 if (!MO.isReg()) {
718 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
719 return true;
720 }
721
722 // FIXME: Can generic operations have physical registers operands? If
723 // so, this will need to be taught about that, and we'll need to get the
724 // bank out of the minimal class for the register.
725 // Either way, this needs to be documented (and possibly verified).
726 if (!Register::isVirtualRegister(MO.getReg())) {
727 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
728 return true;
729 }
730
731 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
732 if (!OpBank) {
733 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
734 return true;
735 }
736
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
739 return true;
740 }
741 PrevOpBank = OpBank;
742 }
743 return false;
744}
745
746/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
747/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
748/// and of size \p OpSize.
749/// \returns \p GenericOpc if the combination is unsupported.
750static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
751 unsigned OpSize) {
752 switch (RegBankID) {
753 case AArch64::GPRRegBankID:
754 if (OpSize == 32) {
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
762 default:
763 return GenericOpc;
764 }
765 } else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
775 default:
776 return GenericOpc;
777 }
778 }
779 break;
780 case AArch64::FPRRegBankID:
781 switch (OpSize) {
782 case 32:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
792 default:
793 return GenericOpc;
794 }
795 case 64:
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
807 default:
808 return GenericOpc;
809 }
810 }
811 break;
812 }
813 return GenericOpc;
814}
815
816/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
817/// appropriate for the (value) register bank \p RegBankID and of memory access
818/// size \p OpSize. This returns the variant with the base+unsigned-immediate
819/// addressing mode (e.g., LDRXui).
820/// \returns \p GenericOpc if the combination is unsupported.
821static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
822 unsigned OpSize) {
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
824 switch (RegBankID) {
825 case AArch64::GPRRegBankID:
826 switch (OpSize) {
827 case 8:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
829 case 16:
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
831 case 32:
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
833 case 64:
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
835 }
836 break;
837 case AArch64::FPRRegBankID:
838 switch (OpSize) {
839 case 8:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
841 case 16:
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
843 case 32:
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
845 case 64:
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
847 case 128:
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
849 }
850 break;
851 }
852 return GenericOpc;
853}
854
855/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
856/// to \p *To.
857///
858/// E.g "To = COPY SrcReg:SubReg"
859static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
860 const RegisterBankInfo &RBI, Register SrcReg,
861 const TargetRegisterClass *To, unsigned SubReg) {
862 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __extension__ __PRETTY_FUNCTION__))
;
864 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 864, __extension__ __PRETTY_FUNCTION__))
;
865
866 MachineIRBuilder MIB(I);
867 auto SubRegCopy =
868 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
869 MachineOperand &RegOp = I.getOperand(1);
870 RegOp.setReg(SubRegCopy.getReg(0));
871
872 // It's possible that the destination register won't be constrained. Make
873 // sure that happens.
874 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
875 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
876
877 return true;
878}
879
880/// Helper function to get the source and destination register classes for a
881/// copy. Returns a std::pair containing the source register class for the
882/// copy, and the destination register class for the copy. If a register class
883/// cannot be determined, then it will be nullptr.
884static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
885getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
886 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
887 const RegisterBankInfo &RBI) {
888 Register DstReg = I.getOperand(0).getReg();
889 Register SrcReg = I.getOperand(1).getReg();
890 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
891 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
892 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
893 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
894
895 // Special casing for cross-bank copies of s1s. We can technically represent
896 // a 1-bit value with any size of register. The minimum size for a GPR is 32
897 // bits. So, we need to put the FPR on 32 bits as well.
898 //
899 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
900 // then we can pull it into the helpers that get the appropriate class for a
901 // register bank. Or make a new helper that carries along some constraint
902 // information.
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
905
906 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
907 getMinClassForRegBank(DstRegBank, DstSize, true)};
908}
909
910// FIXME: We need some sort of API in RBI/TRI to allow generic code to
911// constrain operands of simple instructions given a TargetRegisterClass
912// and LLT
913static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
914 const RegisterBankInfo &RBI) {
915 for (MachineOperand &MO : I.operands()) {
916 if (!MO.isReg())
917 continue;
918 Register Reg = MO.getReg();
919 if (!Reg)
920 continue;
921 if (Reg.isPhysical())
922 continue;
923 LLT Ty = MRI.getType(Reg);
924 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
925 const TargetRegisterClass *RC =
926 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
927 if (!RC) {
928 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
929 RC = getRegClassForTypeOnBank(Ty, RB);
930 if (!RC) {
931 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
932 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
;
933 break;
934 }
935 }
936 RBI.constrainGenericRegister(Reg, *RC, MRI);
937 }
938
939 return true;
940}
941
942static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
943 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
944 const RegisterBankInfo &RBI) {
945 Register DstReg = I.getOperand(0).getReg();
946 Register SrcReg = I.getOperand(1).getReg();
947 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
948 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
949
950 // Find the correct register classes for the source and destination registers.
951 const TargetRegisterClass *SrcRC;
952 const TargetRegisterClass *DstRC;
953 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
954
955 if (!DstRC) {
956 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
957 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
958 return false;
959 }
960
961 // Is this a copy? If so, then we may need to insert a subregister copy.
962 if (I.isCopy()) {
963 // Yes. Check if there's anything to fix up.
964 if (!SrcRC) {
965 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
966 return false;
967 }
968
969 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
970 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
971 unsigned SubReg;
972
973 // If the source bank doesn't support a subregister copy small enough,
974 // then we first need to copy to the destination bank.
975 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
976 const TargetRegisterClass *DstTempRC =
977 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
978 getSubRegForClass(DstRC, TRI, SubReg);
979
980 MachineIRBuilder MIB(I);
981 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
982 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
983 } else if (SrcSize > DstSize) {
984 // If the source register is bigger than the destination we need to
985 // perform a subregister copy.
986 const TargetRegisterClass *SubRegRC =
987 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
988 getSubRegForClass(SubRegRC, TRI, SubReg);
989 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
990 } else if (DstSize > SrcSize) {
991 // If the destination register is bigger than the source we need to do
992 // a promotion using SUBREG_TO_REG.
993 const TargetRegisterClass *PromotionRC =
994 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
995 getSubRegForClass(SrcRC, TRI, SubReg);
996
997 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
998 BuildMI(*I.getParent(), I, I.getDebugLoc(),
999 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1000 .addImm(0)
1001 .addUse(SrcReg)
1002 .addImm(SubReg);
1003 MachineOperand &RegOp = I.getOperand(1);
1004 RegOp.setReg(PromoteReg);
1005 }
1006
1007 // If the destination is a physical register, then there's nothing to
1008 // change, so we're done.
1009 if (Register::isPhysicalRegister(DstReg))
1010 return true;
1011 }
1012
1013 // No need to constrain SrcReg. It will get constrained when we hit another
1014 // of its use or its defs. Copies do not have constraints.
1015 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1016 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1017 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1018 return false;
1019 }
1020
1021 // If this a GPR ZEXT that we want to just reduce down into a copy.
1022 // The sizes will be mismatched with the source < 32b but that's ok.
1023 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1024 I.setDesc(TII.get(AArch64::COPY));
1025 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1025, __extension__ __PRETTY_FUNCTION__))
;
1026 return selectCopy(I, TII, MRI, TRI, RBI);
1027 }
1028
1029 I.setDesc(TII.get(AArch64::COPY));
1030 return true;
1031}
1032
1033static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1034 if (!DstTy.isScalar() || !SrcTy.isScalar())
1035 return GenericOpc;
1036
1037 const unsigned DstSize = DstTy.getSizeInBits();
1038 const unsigned SrcSize = SrcTy.getSizeInBits();
1039
1040 switch (DstSize) {
1041 case 32:
1042 switch (SrcSize) {
1043 case 32:
1044 switch (GenericOpc) {
1045 case TargetOpcode::G_SITOFP:
1046 return AArch64::SCVTFUWSri;
1047 case TargetOpcode::G_UITOFP:
1048 return AArch64::UCVTFUWSri;
1049 case TargetOpcode::G_FPTOSI:
1050 return AArch64::FCVTZSUWSr;
1051 case TargetOpcode::G_FPTOUI:
1052 return AArch64::FCVTZUUWSr;
1053 default:
1054 return GenericOpc;
1055 }
1056 case 64:
1057 switch (GenericOpc) {
1058 case TargetOpcode::G_SITOFP:
1059 return AArch64::SCVTFUXSri;
1060 case TargetOpcode::G_UITOFP:
1061 return AArch64::UCVTFUXSri;
1062 case TargetOpcode::G_FPTOSI:
1063 return AArch64::FCVTZSUWDr;
1064 case TargetOpcode::G_FPTOUI:
1065 return AArch64::FCVTZUUWDr;
1066 default:
1067 return GenericOpc;
1068 }
1069 default:
1070 return GenericOpc;
1071 }
1072 case 64:
1073 switch (SrcSize) {
1074 case 32:
1075 switch (GenericOpc) {
1076 case TargetOpcode::G_SITOFP:
1077 return AArch64::SCVTFUWDri;
1078 case TargetOpcode::G_UITOFP:
1079 return AArch64::UCVTFUWDri;
1080 case TargetOpcode::G_FPTOSI:
1081 return AArch64::FCVTZSUXSr;
1082 case TargetOpcode::G_FPTOUI:
1083 return AArch64::FCVTZUUXSr;
1084 default:
1085 return GenericOpc;
1086 }
1087 case 64:
1088 switch (GenericOpc) {
1089 case TargetOpcode::G_SITOFP:
1090 return AArch64::SCVTFUXDri;
1091 case TargetOpcode::G_UITOFP:
1092 return AArch64::UCVTFUXDri;
1093 case TargetOpcode::G_FPTOSI:
1094 return AArch64::FCVTZSUXDr;
1095 case TargetOpcode::G_FPTOUI:
1096 return AArch64::FCVTZUUXDr;
1097 default:
1098 return GenericOpc;
1099 }
1100 default:
1101 return GenericOpc;
1102 }
1103 default:
1104 return GenericOpc;
1105 };
1106 return GenericOpc;
1107}
1108
1109MachineInstr *
1110AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1111 Register False, AArch64CC::CondCode CC,
1112 MachineIRBuilder &MIB) const {
1113 MachineRegisterInfo &MRI = *MIB.getMRI();
1114 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1115 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1116 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
;
1117 LLT Ty = MRI.getType(True);
1118 if (Ty.isVector())
1119 return nullptr;
1120 const unsigned Size = Ty.getSizeInBits();
1121 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
1122 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
;
1123 const bool Is32Bit = Size == 32;
1124 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1125 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1126 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1127 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1128 return &*FCSel;
1129 }
1130
1131 // By default, we'll try and emit a CSEL.
1132 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1133 bool Optimized = false;
1134 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1135 &Optimized](Register &Reg, Register &OtherReg,
1136 bool Invert) {
1137 if (Optimized)
1138 return false;
1139
1140 // Attempt to fold:
1141 //
1142 // %sub = G_SUB 0, %x
1143 // %select = G_SELECT cc, %reg, %sub
1144 //
1145 // Into:
1146 // %select = CSNEG %reg, %x, cc
1147 Register MatchReg;
1148 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1149 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1150 Reg = MatchReg;
1151 if (Invert) {
1152 CC = AArch64CC::getInvertedCondCode(CC);
1153 std::swap(Reg, OtherReg);
1154 }
1155 return true;
1156 }
1157
1158 // Attempt to fold:
1159 //
1160 // %xor = G_XOR %x, -1
1161 // %select = G_SELECT cc, %reg, %xor
1162 //
1163 // Into:
1164 // %select = CSINV %reg, %x, cc
1165 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1166 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1167 Reg = MatchReg;
1168 if (Invert) {
1169 CC = AArch64CC::getInvertedCondCode(CC);
1170 std::swap(Reg, OtherReg);
1171 }
1172 return true;
1173 }
1174
1175 // Attempt to fold:
1176 //
1177 // %add = G_ADD %x, 1
1178 // %select = G_SELECT cc, %reg, %add
1179 //
1180 // Into:
1181 // %select = CSINC %reg, %x, cc
1182 if (mi_match(Reg, MRI,
1183 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1184 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1186 Reg = MatchReg;
1187 if (Invert) {
1188 CC = AArch64CC::getInvertedCondCode(CC);
1189 std::swap(Reg, OtherReg);
1190 }
1191 return true;
1192 }
1193
1194 return false;
1195 };
1196
1197 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1198 // true/false values are constants.
1199 // FIXME: All of these patterns already exist in tablegen. We should be
1200 // able to import these.
1201 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1202 &Optimized]() {
1203 if (Optimized)
1204 return false;
1205 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1206 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1207 if (!TrueCst && !FalseCst)
1208 return false;
1209
1210 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1211 if (TrueCst && FalseCst) {
1212 int64_t T = TrueCst->Value.getSExtValue();
1213 int64_t F = FalseCst->Value.getSExtValue();
1214
1215 if (T == 0 && F == 1) {
1216 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1217 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1218 True = ZReg;
1219 False = ZReg;
1220 return true;
1221 }
1222
1223 if (T == 0 && F == -1) {
1224 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1225 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1226 True = ZReg;
1227 False = ZReg;
1228 return true;
1229 }
1230 }
1231
1232 if (TrueCst) {
1233 int64_t T = TrueCst->Value.getSExtValue();
1234 if (T == 1) {
1235 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1236 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1237 True = False;
1238 False = ZReg;
1239 CC = AArch64CC::getInvertedCondCode(CC);
1240 return true;
1241 }
1242
1243 if (T == -1) {
1244 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1245 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1246 True = False;
1247 False = ZReg;
1248 CC = AArch64CC::getInvertedCondCode(CC);
1249 return true;
1250 }
1251 }
1252
1253 if (FalseCst) {
1254 int64_t F = FalseCst->Value.getSExtValue();
1255 if (F == 1) {
1256 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1257 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1258 False = ZReg;
1259 return true;
1260 }
1261
1262 if (F == -1) {
1263 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1264 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1265 False = ZReg;
1266 return true;
1267 }
1268 }
1269 return false;
1270 };
1271
1272 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1273 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1274 Optimized |= TryOptSelectCst();
1275 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1276 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1277 return &*SelectInst;
1278}
1279
1280static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1281 switch (P) {
1282 default:
1283 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1283)
;
1284 case CmpInst::ICMP_NE:
1285 return AArch64CC::NE;
1286 case CmpInst::ICMP_EQ:
1287 return AArch64CC::EQ;
1288 case CmpInst::ICMP_SGT:
1289 return AArch64CC::GT;
1290 case CmpInst::ICMP_SGE:
1291 return AArch64CC::GE;
1292 case CmpInst::ICMP_SLT:
1293 return AArch64CC::LT;
1294 case CmpInst::ICMP_SLE:
1295 return AArch64CC::LE;
1296 case CmpInst::ICMP_UGT:
1297 return AArch64CC::HI;
1298 case CmpInst::ICMP_UGE:
1299 return AArch64CC::HS;
1300 case CmpInst::ICMP_ULT:
1301 return AArch64CC::LO;
1302 case CmpInst::ICMP_ULE:
1303 return AArch64CC::LS;
1304 }
1305}
1306
1307/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1308static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1309 AArch64CC::CondCode &CondCode,
1310 AArch64CC::CondCode &CondCode2) {
1311 CondCode2 = AArch64CC::AL;
1312 switch (CC) {
1313 default:
1314 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1314)
;
1315 case CmpInst::FCMP_OEQ:
1316 CondCode = AArch64CC::EQ;
1317 break;
1318 case CmpInst::FCMP_OGT:
1319 CondCode = AArch64CC::GT;
1320 break;
1321 case CmpInst::FCMP_OGE:
1322 CondCode = AArch64CC::GE;
1323 break;
1324 case CmpInst::FCMP_OLT:
1325 CondCode = AArch64CC::MI;
1326 break;
1327 case CmpInst::FCMP_OLE:
1328 CondCode = AArch64CC::LS;
1329 break;
1330 case CmpInst::FCMP_ONE:
1331 CondCode = AArch64CC::MI;
1332 CondCode2 = AArch64CC::GT;
1333 break;
1334 case CmpInst::FCMP_ORD:
1335 CondCode = AArch64CC::VC;
1336 break;
1337 case CmpInst::FCMP_UNO:
1338 CondCode = AArch64CC::VS;
1339 break;
1340 case CmpInst::FCMP_UEQ:
1341 CondCode = AArch64CC::EQ;
1342 CondCode2 = AArch64CC::VS;
1343 break;
1344 case CmpInst::FCMP_UGT:
1345 CondCode = AArch64CC::HI;
1346 break;
1347 case CmpInst::FCMP_UGE:
1348 CondCode = AArch64CC::PL;
1349 break;
1350 case CmpInst::FCMP_ULT:
1351 CondCode = AArch64CC::LT;
1352 break;
1353 case CmpInst::FCMP_ULE:
1354 CondCode = AArch64CC::LE;
1355 break;
1356 case CmpInst::FCMP_UNE:
1357 CondCode = AArch64CC::NE;
1358 break;
1359 }
1360}
1361
1362/// Convert an IR fp condition code to an AArch64 CC.
1363/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1364/// should be AND'ed instead of OR'ed.
1365static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1366 AArch64CC::CondCode &CondCode,
1367 AArch64CC::CondCode &CondCode2) {
1368 CondCode2 = AArch64CC::AL;
1369 switch (CC) {
1370 default:
1371 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1372 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1372, __extension__ __PRETTY_FUNCTION__))
;
1373 break;
1374 case CmpInst::FCMP_ONE:
1375 // (a one b)
1376 // == ((a olt b) || (a ogt b))
1377 // == ((a ord b) && (a une b))
1378 CondCode = AArch64CC::VC;
1379 CondCode2 = AArch64CC::NE;
1380 break;
1381 case CmpInst::FCMP_UEQ:
1382 // (a ueq b)
1383 // == ((a uno b) || (a oeq b))
1384 // == ((a ule b) && (a uge b))
1385 CondCode = AArch64CC::PL;
1386 CondCode2 = AArch64CC::LE;
1387 break;
1388 }
1389}
1390
1391/// Return a register which can be used as a bit to test in a TB(N)Z.
1392static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1393 MachineRegisterInfo &MRI) {
1394 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1394, __extension__ __PRETTY_FUNCTION__))
;
1395 bool HasZext = false;
1396 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1397 unsigned Opc = MI->getOpcode();
1398
1399 if (!MI->getOperand(0).isReg() ||
1400 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1401 break;
1402
1403 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1404 //
1405 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1406 // on the truncated x is the same as the bit number on x.
1407 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1408 Opc == TargetOpcode::G_TRUNC) {
1409 if (Opc == TargetOpcode::G_ZEXT)
1410 HasZext = true;
1411
1412 Register NextReg = MI->getOperand(1).getReg();
1413 // Did we find something worth folding?
1414 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1415 break;
1416
1417 // NextReg is worth folding. Keep looking.
1418 Reg = NextReg;
1419 continue;
1420 }
1421
1422 // Attempt to find a suitable operation with a constant on one side.
1423 Optional<uint64_t> C;
1424 Register TestReg;
1425 switch (Opc) {
1426 default:
1427 break;
1428 case TargetOpcode::G_AND:
1429 case TargetOpcode::G_XOR: {
1430 TestReg = MI->getOperand(1).getReg();
1431 Register ConstantReg = MI->getOperand(2).getReg();
1432 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1433 if (!VRegAndVal) {
1434 // AND commutes, check the other side for a constant.
1435 // FIXME: Can we canonicalize the constant so that it's always on the
1436 // same side at some point earlier?
1437 std::swap(ConstantReg, TestReg);
1438 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1439 }
1440 if (VRegAndVal) {
1441 if (HasZext)
1442 C = VRegAndVal->Value.getZExtValue();
1443 else
1444 C = VRegAndVal->Value.getSExtValue();
1445 }
1446 break;
1447 }
1448 case TargetOpcode::G_ASHR:
1449 case TargetOpcode::G_LSHR:
1450 case TargetOpcode::G_SHL: {
1451 TestReg = MI->getOperand(1).getReg();
1452 auto VRegAndVal =
1453 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1454 if (VRegAndVal)
1455 C = VRegAndVal->Value.getSExtValue();
1456 break;
1457 }
1458 }
1459
1460 // Didn't find a constant or viable register. Bail out of the loop.
1461 if (!C || !TestReg.isValid())
1462 break;
1463
1464 // We found a suitable instruction with a constant. Check to see if we can
1465 // walk through the instruction.
1466 Register NextReg;
1467 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1468 switch (Opc) {
1469 default:
1470 break;
1471 case TargetOpcode::G_AND:
1472 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1473 if ((*C >> Bit) & 1)
1474 NextReg = TestReg;
1475 break;
1476 case TargetOpcode::G_SHL:
1477 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1478 // the type of the register.
1479 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1480 NextReg = TestReg;
1481 Bit = Bit - *C;
1482 }
1483 break;
1484 case TargetOpcode::G_ASHR:
1485 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1486 // in x
1487 NextReg = TestReg;
1488 Bit = Bit + *C;
1489 if (Bit >= TestRegSize)
1490 Bit = TestRegSize - 1;
1491 break;
1492 case TargetOpcode::G_LSHR:
1493 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1494 if ((Bit + *C) < TestRegSize) {
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 }
1498 break;
1499 case TargetOpcode::G_XOR:
1500 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1501 // appropriate.
1502 //
1503 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1504 //
1505 // tbz x', b -> tbnz x, b
1506 //
1507 // Because x' only has the b-th bit set if x does not.
1508 if ((*C >> Bit) & 1)
1509 Invert = !Invert;
1510 NextReg = TestReg;
1511 break;
1512 }
1513
1514 // Check if we found anything worth folding.
1515 if (!NextReg.isValid())
1516 return Reg;
1517 Reg = NextReg;
1518 }
1519
1520 return Reg;
1521}
1522
1523MachineInstr *AArch64InstructionSelector::emitTestBit(
1524 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1525 MachineIRBuilder &MIB) const {
1526 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
1528 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
;
1529 MachineRegisterInfo &MRI = *MIB.getMRI();
1530
1531 // Attempt to optimize the test bit by walking over instructions.
1532 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1533 LLT Ty = MRI.getType(TestReg);
1534 unsigned Size = Ty.getSizeInBits();
1535 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1535, __extension__ __PRETTY_FUNCTION__))
;
1536 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1536, __extension__ __PRETTY_FUNCTION__))
;
1537
1538 // When the test register is a 64-bit register, we have to narrow to make
1539 // TBNZW work.
1540 bool UseWReg = Bit < 32;
1541 unsigned NecessarySize = UseWReg ? 32 : 64;
1542 if (Size != NecessarySize)
1543 TestReg = moveScalarRegClass(
1544 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1545 MIB);
1546
1547 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1548 {AArch64::TBZW, AArch64::TBNZW}};
1549 unsigned Opc = OpcTable[UseWReg][IsNegative];
1550 auto TestBitMI =
1551 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1552 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1553 return &*TestBitMI;
1554}
1555
1556bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1557 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1558 MachineIRBuilder &MIB) const {
1559 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1559, __extension__ __PRETTY_FUNCTION__))
;
1560 // Given something like this:
1561 //
1562 // %x = ...Something...
1563 // %one = G_CONSTANT i64 1
1564 // %zero = G_CONSTANT i64 0
1565 // %and = G_AND %x, %one
1566 // %cmp = G_ICMP intpred(ne), %and, %zero
1567 // %cmp_trunc = G_TRUNC %cmp
1568 // G_BRCOND %cmp_trunc, %bb.3
1569 //
1570 // We want to try and fold the AND into the G_BRCOND and produce either a
1571 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1572 //
1573 // In this case, we'd get
1574 //
1575 // TBNZ %x %bb.3
1576 //
1577
1578 // Check if the AND has a constant on its RHS which we can use as a mask.
1579 // If it's a power of 2, then it's the same as checking a specific bit.
1580 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1581 auto MaybeBit = getIConstantVRegValWithLookThrough(
1582 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1583 if (!MaybeBit)
1584 return false;
1585
1586 int32_t Bit = MaybeBit->Value.exactLogBase2();
1587 if (Bit < 0)
1588 return false;
1589
1590 Register TestReg = AndInst.getOperand(1).getReg();
1591
1592 // Emit a TB(N)Z.
1593 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1594 return true;
1595}
1596
1597MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1598 bool IsNegative,
1599 MachineBasicBlock *DestMBB,
1600 MachineIRBuilder &MIB) const {
1601 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1601, __extension__ __PRETTY_FUNCTION__))
;
1602 MachineRegisterInfo &MRI = *MIB.getMRI();
1603 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1604 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1605 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
;
1606 auto Ty = MRI.getType(CompareReg);
1607 unsigned Width = Ty.getSizeInBits();
1608 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1608, __extension__ __PRETTY_FUNCTION__))
;
1609 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1609, __extension__ __PRETTY_FUNCTION__))
;
1610 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1611 {AArch64::CBNZW, AArch64::CBNZX}};
1612 unsigned Opc = OpcTable[IsNegative][Width == 64];
1613 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1614 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1615 return &*BranchMI;
1616}
1617
1618bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1619 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1620 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1620, __extension__ __PRETTY_FUNCTION__))
;
1621 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
;
1622 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1623 // totally clean. Some of them require two branches to implement.
1624 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1625 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1626 Pred);
1627 AArch64CC::CondCode CC1, CC2;
1628 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1629 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1630 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1631 if (CC2 != AArch64CC::AL)
1632 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1633 I.eraseFromParent();
1634 return true;
1635}
1636
1637bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1638 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1639 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1639, __extension__ __PRETTY_FUNCTION__))
;
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1640, __extension__ __PRETTY_FUNCTION__))
;
1641 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1642 //
1643 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1644 // instructions will not be produced, as they are conditional branch
1645 // instructions that do not set flags.
1646 if (!ProduceNonFlagSettingCondBr)
1647 return false;
1648
1649 MachineRegisterInfo &MRI = *MIB.getMRI();
1650 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1651 auto Pred =
1652 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1653 Register LHS = ICmp.getOperand(2).getReg();
1654 Register RHS = ICmp.getOperand(3).getReg();
1655
1656 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1657 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1658 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1659
1660 // When we can emit a TB(N)Z, prefer that.
1661 //
1662 // Handle non-commutative condition codes first.
1663 // Note that we don't want to do this when we have a G_AND because it can
1664 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1665 if (VRegAndVal && !AndInst) {
1666 int64_t C = VRegAndVal->Value.getSExtValue();
1667
1668 // When we have a greater-than comparison, we can just test if the msb is
1669 // zero.
1670 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1671 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1672 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1673 I.eraseFromParent();
1674 return true;
1675 }
1676
1677 // When we have a less than comparison, we can just test if the msb is not
1678 // zero.
1679 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1680 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1681 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1682 I.eraseFromParent();
1683 return true;
1684 }
1685 }
1686
1687 // Attempt to handle commutative condition codes. Right now, that's only
1688 // eq/ne.
1689 if (ICmpInst::isEquality(Pred)) {
1690 if (!VRegAndVal) {
1691 std::swap(RHS, LHS);
1692 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1693 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1694 }
1695
1696 if (VRegAndVal && VRegAndVal->Value == 0) {
1697 // If there's a G_AND feeding into this branch, try to fold it away by
1698 // emitting a TB(N)Z instead.
1699 //
1700 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1701 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1702 // would be redundant.
1703 if (AndInst &&
1704 tryOptAndIntoCompareBranch(
1705 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1706 I.eraseFromParent();
1707 return true;
1708 }
1709
1710 // Otherwise, try to emit a CB(N)Z instead.
1711 auto LHSTy = MRI.getType(LHS);
1712 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1713 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1714 I.eraseFromParent();
1715 return true;
1716 }
1717 }
1718 }
1719
1720 return false;
1721}
1722
1723bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1724 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1725 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1725, __extension__ __PRETTY_FUNCTION__))
;
1726 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1726, __extension__ __PRETTY_FUNCTION__))
;
1727 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1728 return true;
1729
1730 // Couldn't optimize. Emit a compare + a Bcc.
1731 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1732 auto PredOp = ICmp.getOperand(1);
1733 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1734 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1735 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1736 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1737 I.eraseFromParent();
1738 return true;
1739}
1740
1741bool AArch64InstructionSelector::selectCompareBranch(
1742 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1743 Register CondReg = I.getOperand(0).getReg();
1744 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1745 // Try to select the G_BRCOND using whatever is feeding the condition if
1746 // possible.
1747 unsigned CCMIOpc = CCMI->getOpcode();
1748 if (CCMIOpc == TargetOpcode::G_FCMP)
1749 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1750 if (CCMIOpc == TargetOpcode::G_ICMP)
1751 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1752
1753 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1754 // instructions will not be produced, as they are conditional branch
1755 // instructions that do not set flags.
1756 if (ProduceNonFlagSettingCondBr) {
1757 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1758 I.getOperand(1).getMBB(), MIB);
1759 I.eraseFromParent();
1760 return true;
1761 }
1762
1763 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1764 auto TstMI =
1765 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1766 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1767 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1768 .addImm(AArch64CC::EQ)
1769 .addMBB(I.getOperand(1).getMBB());
1770 I.eraseFromParent();
1771 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1772}
1773
1774/// Returns the element immediate value of a vector shift operand if found.
1775/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1776static Optional<int64_t> getVectorShiftImm(Register Reg,
1777 MachineRegisterInfo &MRI) {
1778 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1778, __extension__ __PRETTY_FUNCTION__))
;
1779 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1780 return getAArch64VectorSplatScalar(*OpMI, MRI);
1781}
1782
1783/// Matches and returns the shift immediate value for a SHL instruction given
1784/// a shift operand.
1785static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1786 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1787 if (!ShiftImm)
1788 return None;
1789 // Check the immediate is in range for a SHL.
1790 int64_t Imm = *ShiftImm;
1791 if (Imm < 0)
1792 return None;
1793 switch (SrcTy.getElementType().getSizeInBits()) {
1794 default:
1795 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1796 return None;
1797 case 8:
1798 if (Imm > 7)
1799 return None;
1800 break;
1801 case 16:
1802 if (Imm > 15)
1803 return None;
1804 break;
1805 case 32:
1806 if (Imm > 31)
1807 return None;
1808 break;
1809 case 64:
1810 if (Imm > 63)
1811 return None;
1812 break;
1813 }
1814 return Imm;
1815}
1816
1817bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1818 MachineRegisterInfo &MRI) {
1819 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1819, __extension__ __PRETTY_FUNCTION__))
;
1820 Register DstReg = I.getOperand(0).getReg();
1821 const LLT Ty = MRI.getType(DstReg);
1822 Register Src1Reg = I.getOperand(1).getReg();
1823 Register Src2Reg = I.getOperand(2).getReg();
1824
1825 if (!Ty.isVector())
1826 return false;
1827
1828 // Check if we have a vector of constants on RHS that we can select as the
1829 // immediate form.
1830 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1831
1832 unsigned Opc = 0;
1833 if (Ty == LLT::fixed_vector(2, 64)) {
1834 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1835 } else if (Ty == LLT::fixed_vector(4, 32)) {
1836 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1837 } else if (Ty == LLT::fixed_vector(2, 32)) {
1838 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1839 } else if (Ty == LLT::fixed_vector(4, 16)) {
1840 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1841 } else if (Ty == LLT::fixed_vector(8, 16)) {
1842 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1843 } else if (Ty == LLT::fixed_vector(16, 8)) {
1844 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1845 } else if (Ty == LLT::fixed_vector(8, 8)) {
1846 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1847 } else {
1848 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1849 return false;
1850 }
1851
1852 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1853 if (ImmVal)
1854 Shl.addImm(*ImmVal);
1855 else
1856 Shl.addUse(Src2Reg);
1857 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1858 I.eraseFromParent();
1859 return true;
1860}
1861
1862bool AArch64InstructionSelector::selectVectorAshrLshr(
1863 MachineInstr &I, MachineRegisterInfo &MRI) {
1864 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1865, __extension__ __PRETTY_FUNCTION__))
1865 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1865, __extension__ __PRETTY_FUNCTION__))
;
1866 Register DstReg = I.getOperand(0).getReg();
1867 const LLT Ty = MRI.getType(DstReg);
1868 Register Src1Reg = I.getOperand(1).getReg();
1869 Register Src2Reg = I.getOperand(2).getReg();
1870
1871 if (!Ty.isVector())
1872 return false;
1873
1874 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1875
1876 // We expect the immediate case to be lowered in the PostLegalCombiner to
1877 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1878
1879 // There is not a shift right register instruction, but the shift left
1880 // register instruction takes a signed value, where negative numbers specify a
1881 // right shift.
1882
1883 unsigned Opc = 0;
1884 unsigned NegOpc = 0;
1885 const TargetRegisterClass *RC =
1886 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1887 if (Ty == LLT::fixed_vector(2, 64)) {
1888 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1889 NegOpc = AArch64::NEGv2i64;
1890 } else if (Ty == LLT::fixed_vector(4, 32)) {
1891 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1892 NegOpc = AArch64::NEGv4i32;
1893 } else if (Ty == LLT::fixed_vector(2, 32)) {
1894 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1895 NegOpc = AArch64::NEGv2i32;
1896 } else if (Ty == LLT::fixed_vector(4, 16)) {
1897 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1898 NegOpc = AArch64::NEGv4i16;
1899 } else if (Ty == LLT::fixed_vector(8, 16)) {
1900 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1901 NegOpc = AArch64::NEGv8i16;
1902 } else if (Ty == LLT::fixed_vector(16, 8)) {
1903 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1904 NegOpc = AArch64::NEGv16i8;
1905 } else if (Ty == LLT::fixed_vector(8, 8)) {
1906 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1907 NegOpc = AArch64::NEGv8i8;
1908 } else {
1909 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1910 return false;
1911 }
1912
1913 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1914 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1915 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1916 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1917 I.eraseFromParent();
1918 return true;
1919}
1920
1921bool AArch64InstructionSelector::selectVaStartAAPCS(
1922 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1923 return false;
1924}
1925
1926bool AArch64InstructionSelector::selectVaStartDarwin(
1927 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1928 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1929 Register ListReg = I.getOperand(0).getReg();
1930
1931 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1932
1933 auto MIB =
1934 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1935 .addDef(ArgsAddrReg)
1936 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1937 .addImm(0)
1938 .addImm(0);
1939
1940 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1941
1942 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1943 .addUse(ArgsAddrReg)
1944 .addUse(ListReg)
1945 .addImm(0)
1946 .addMemOperand(*I.memoperands_begin());
1947
1948 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1949 I.eraseFromParent();
1950 return true;
1951}
1952
1953void AArch64InstructionSelector::materializeLargeCMVal(
1954 MachineInstr &I, const Value *V, unsigned OpFlags) {
1955 MachineBasicBlock &MBB = *I.getParent();
1956 MachineFunction &MF = *MBB.getParent();
1957 MachineRegisterInfo &MRI = MF.getRegInfo();
1958
1959 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1960 MovZ->addOperand(MF, I.getOperand(1));
1961 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1962 AArch64II::MO_NC);
1963 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1964 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1965
1966 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1967 Register ForceDstReg) {
1968 Register DstReg = ForceDstReg
1969 ? ForceDstReg
1970 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1971 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1972 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1973 MovI->addOperand(MF, MachineOperand::CreateGA(
1974 GV, MovZ->getOperand(1).getOffset(), Flags));
1975 } else {
1976 MovI->addOperand(
1977 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1978 MovZ->getOperand(1).getOffset(), Flags));
1979 }
1980 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1981 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1982 return DstReg;
1983 };
1984 Register DstReg = BuildMovK(MovZ.getReg(0),
1985 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1986 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1987 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1988}
1989
1990bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1991 MachineBasicBlock &MBB = *I.getParent();
1992 MachineFunction &MF = *MBB.getParent();
1993 MachineRegisterInfo &MRI = MF.getRegInfo();
1994
1995 switch (I.getOpcode()) {
1996 case TargetOpcode::G_STORE: {
1997 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1998 MachineOperand &SrcOp = I.getOperand(0);
1999 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2000 // Allow matching with imported patterns for stores of pointers. Unlike
2001 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2002 // and constrain.
2003 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2004 Register NewSrc = Copy.getReg(0);
2005 SrcOp.setReg(NewSrc);
2006 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2007 Changed = true;
2008 }
2009 return Changed;
2010 }
2011 case TargetOpcode::G_PTR_ADD:
2012 return convertPtrAddToAdd(I, MRI);
2013 case TargetOpcode::G_LOAD: {
2014 // For scalar loads of pointers, we try to convert the dest type from p0
2015 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2016 // conversion, this should be ok because all users should have been
2017 // selected already, so the type doesn't matter for them.
2018 Register DstReg = I.getOperand(0).getReg();
2019 const LLT DstTy = MRI.getType(DstReg);
2020 if (!DstTy.isPointer())
2021 return false;
2022 MRI.setType(DstReg, LLT::scalar(64));
2023 return true;
2024 }
2025 case AArch64::G_DUP: {
2026 // Convert the type from p0 to s64 to help selection.
2027 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2028 if (!DstTy.getElementType().isPointer())
2029 return false;
2030 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2031 MRI.setType(I.getOperand(0).getReg(),
2032 DstTy.changeElementType(LLT::scalar(64)));
2033 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2034 I.getOperand(1).setReg(NewSrc.getReg(0));
2035 return true;
2036 }
2037 case TargetOpcode::G_UITOFP:
2038 case TargetOpcode::G_SITOFP: {
2039 // If both source and destination regbanks are FPR, then convert the opcode
2040 // to G_SITOF so that the importer can select it to an fpr variant.
2041 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2042 // copy.
2043 Register SrcReg = I.getOperand(1).getReg();
2044 LLT SrcTy = MRI.getType(SrcReg);
2045 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2046 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2047 return false;
2048
2049 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2050 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2051 I.setDesc(TII.get(AArch64::G_SITOF));
2052 else
2053 I.setDesc(TII.get(AArch64::G_UITOF));
2054 return true;
2055 }
2056 return false;
2057 }
2058 default:
2059 return false;
2060 }
2061}
2062
2063/// This lowering tries to look for G_PTR_ADD instructions and then converts
2064/// them to a standard G_ADD with a COPY on the source.
2065///
2066/// The motivation behind this is to expose the add semantics to the imported
2067/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2068/// because the selector works bottom up, uses before defs. By the time we
2069/// end up trying to select a G_PTR_ADD, we should have already attempted to
2070/// fold this into addressing modes and were therefore unsuccessful.
2071bool AArch64InstructionSelector::convertPtrAddToAdd(
2072 MachineInstr &I, MachineRegisterInfo &MRI) {
2073 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2073, __extension__ __PRETTY_FUNCTION__))
;
2074 Register DstReg = I.getOperand(0).getReg();
2075 Register AddOp1Reg = I.getOperand(1).getReg();
2076 const LLT PtrTy = MRI.getType(DstReg);
2077 if (PtrTy.getAddressSpace() != 0)
2078 return false;
2079
2080 const LLT CastPtrTy =
2081 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2082 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2083 // Set regbanks on the registers.
2084 if (PtrTy.isVector())
2085 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2086 else
2087 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2088
2089 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2090 // %dst(intty) = G_ADD %intbase, off
2091 I.setDesc(TII.get(TargetOpcode::G_ADD));
2092 MRI.setType(DstReg, CastPtrTy);
2093 I.getOperand(1).setReg(PtrToInt.getReg(0));
2094 if (!select(*PtrToInt)) {
2095 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2096 return false;
2097 }
2098
2099 // Also take the opportunity here to try to do some optimization.
2100 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2101 Register NegatedReg;
2102 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2103 return true;
2104 I.getOperand(2).setReg(NegatedReg);
2105 I.setDesc(TII.get(TargetOpcode::G_SUB));
2106 return true;
2107}
2108
2109bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2110 MachineRegisterInfo &MRI) {
2111 // We try to match the immediate variant of LSL, which is actually an alias
2112 // for a special case of UBFM. Otherwise, we fall back to the imported
2113 // selector which will match the register variant.
2114 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2114, __extension__ __PRETTY_FUNCTION__))
;
2115 const auto &MO = I.getOperand(2);
2116 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2117 if (!VRegAndVal)
2118 return false;
2119
2120 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2121 if (DstTy.isVector())
2122 return false;
2123 bool Is64Bit = DstTy.getSizeInBits() == 64;
2124 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2125 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2126
2127 if (!Imm1Fn || !Imm2Fn)
2128 return false;
2129
2130 auto NewI =
2131 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2132 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2133
2134 for (auto &RenderFn : *Imm1Fn)
2135 RenderFn(NewI);
2136 for (auto &RenderFn : *Imm2Fn)
2137 RenderFn(NewI);
2138
2139 I.eraseFromParent();
2140 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2141}
2142
2143bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2144 MachineInstr &I, MachineRegisterInfo &MRI) {
2145 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2145, __extension__ __PRETTY_FUNCTION__))
;
2146 // If we're storing a scalar, it doesn't matter what register bank that
2147 // scalar is on. All that matters is the size.
2148 //
2149 // So, if we see something like this (with a 32-bit scalar as an example):
2150 //
2151 // %x:gpr(s32) = ... something ...
2152 // %y:fpr(s32) = COPY %x:gpr(s32)
2153 // G_STORE %y:fpr(s32)
2154 //
2155 // We can fix this up into something like this:
2156 //
2157 // G_STORE %x:gpr(s32)
2158 //
2159 // And then continue the selection process normally.
2160 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2161 if (!DefDstReg.isValid())
2162 return false;
2163 LLT DefDstTy = MRI.getType(DefDstReg);
2164 Register StoreSrcReg = I.getOperand(0).getReg();
2165 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2166
2167 // If we get something strange like a physical register, then we shouldn't
2168 // go any further.
2169 if (!DefDstTy.isValid())
2170 return false;
2171
2172 // Are the source and dst types the same size?
2173 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2174 return false;
2175
2176 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2177 RBI.getRegBank(DefDstReg, MRI, TRI))
2178 return false;
2179
2180 // We have a cross-bank copy, which is entering a store. Let's fold it.
2181 I.getOperand(0).setReg(DefDstReg);
2182 return true;
2183}
2184
2185bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2186 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2186, __extension__ __PRETTY_FUNCTION__))
;
2187 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2187, __extension__ __PRETTY_FUNCTION__))
;
2188
2189 MachineBasicBlock &MBB = *I.getParent();
2190 MachineFunction &MF = *MBB.getParent();
2191 MachineRegisterInfo &MRI = MF.getRegInfo();
2192
2193 switch (I.getOpcode()) {
2194 case AArch64::G_DUP: {
2195 // Before selecting a DUP instruction, check if it is better selected as a
2196 // MOV or load from a constant pool.
2197 Register Src = I.getOperand(1).getReg();
2198 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2199 if (!ValAndVReg)
2200 return false;
2201 LLVMContext &Ctx = MF.getFunction().getContext();
2202 Register Dst = I.getOperand(0).getReg();
2203 auto *CV = ConstantDataVector::getSplat(
2204 MRI.getType(Dst).getNumElements(),
2205 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2206 ValAndVReg->Value));
2207 if (!emitConstantVector(Dst, CV, MIB, MRI))
2208 return false;
2209 I.eraseFromParent();
2210 return true;
2211 }
2212 case TargetOpcode::G_SEXT:
2213 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2214 // over a normal extend.
2215 if (selectUSMovFromExtend(I, MRI))
2216 return true;
2217 return false;
2218 case TargetOpcode::G_BR:
2219 return false;
2220 case TargetOpcode::G_SHL:
2221 return earlySelectSHL(I, MRI);
2222 case TargetOpcode::G_CONSTANT: {
2223 bool IsZero = false;
2224 if (I.getOperand(1).isCImm())
2225 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2226 else if (I.getOperand(1).isImm())
2227 IsZero = I.getOperand(1).getImm() == 0;
2228
2229 if (!IsZero)
2230 return false;
2231
2232 Register DefReg = I.getOperand(0).getReg();
2233 LLT Ty = MRI.getType(DefReg);
2234 if (Ty.getSizeInBits() == 64) {
2235 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2236 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2237 } else if (Ty.getSizeInBits() == 32) {
2238 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2239 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2240 } else
2241 return false;
2242
2243 I.setDesc(TII.get(TargetOpcode::COPY));
2244 return true;
2245 }
2246
2247 case TargetOpcode::G_ADD: {
2248 // Check if this is being fed by a G_ICMP on either side.
2249 //
2250 // (cmp pred, x, y) + z
2251 //
2252 // In the above case, when the cmp is true, we increment z by 1. So, we can
2253 // fold the add into the cset for the cmp by using cinc.
2254 //
2255 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2256 Register AddDst = I.getOperand(0).getReg();
2257 Register AddLHS = I.getOperand(1).getReg();
2258 Register AddRHS = I.getOperand(2).getReg();
2259 // Only handle scalars.
2260 LLT Ty = MRI.getType(AddLHS);
2261 if (Ty.isVector())
2262 return false;
2263 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2264 // bits.
2265 unsigned Size = Ty.getSizeInBits();
2266 if (Size != 32 && Size != 64)
2267 return false;
2268 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2269 if (!MRI.hasOneNonDBGUse(Reg))
2270 return nullptr;
2271 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2272 // compare.
2273 if (Size == 32)
2274 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2275 // We model scalar compares using 32-bit destinations right now.
2276 // If it's a 64-bit compare, it'll have 64-bit sources.
2277 Register ZExt;
2278 if (!mi_match(Reg, MRI,
2279 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2280 return nullptr;
2281 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2282 if (!Cmp ||
2283 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2284 return nullptr;
2285 return Cmp;
2286 };
2287 // Try to match
2288 // z + (cmp pred, x, y)
2289 MachineInstr *Cmp = MatchCmp(AddRHS);
2290 if (!Cmp) {
2291 // (cmp pred, x, y) + z
2292 std::swap(AddLHS, AddRHS);
2293 Cmp = MatchCmp(AddRHS);
2294 if (!Cmp)
2295 return false;
2296 }
2297 auto &PredOp = Cmp->getOperand(1);
2298 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2299 const AArch64CC::CondCode InvCC =
2300 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2301 MIB.setInstrAndDebugLoc(I);
2302 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2303 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2304 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2305 I.eraseFromParent();
2306 return true;
2307 }
2308 case TargetOpcode::G_OR: {
2309 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2310 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2311 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2312 Register Dst = I.getOperand(0).getReg();
2313 LLT Ty = MRI.getType(Dst);
2314
2315 if (!Ty.isScalar())
2316 return false;
2317
2318 unsigned Size = Ty.getSizeInBits();
2319 if (Size != 32 && Size != 64)
2320 return false;
2321
2322 Register ShiftSrc;
2323 int64_t ShiftImm;
2324 Register MaskSrc;
2325 int64_t MaskImm;
2326 if (!mi_match(
2327 Dst, MRI,
2328 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2329 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2330 return false;
2331
2332 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2333 return false;
2334
2335 int64_t Immr = Size - ShiftImm;
2336 int64_t Imms = Size - ShiftImm - 1;
2337 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2338 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2339 I.eraseFromParent();
2340 return true;
2341 }
2342 case TargetOpcode::G_FENCE: {
2343 if (I.getOperand(1).getImm() == 0)
2344 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CompilerBarrier))
2345 .addImm(I.getOperand(0).getImm());
2346 else
2347 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::DMB))
2348 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2349 I.eraseFromParent();
2350 return true;
2351 }
2352 default:
2353 return false;
2354 }
2355}
2356
2357bool AArch64InstructionSelector::select(MachineInstr &I) {
2358 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2358, __extension__ __PRETTY_FUNCTION__))
;
2359 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2359, __extension__ __PRETTY_FUNCTION__))
;
2360
2361 MachineBasicBlock &MBB = *I.getParent();
2362 MachineFunction &MF = *MBB.getParent();
2363 MachineRegisterInfo &MRI = MF.getRegInfo();
2364
2365 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2366 if (Subtarget->requiresStrictAlign()) {
2367 // We don't support this feature yet.
2368 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2369 return false;
2370 }
2371
2372 MIB.setInstrAndDebugLoc(I);
2373
2374 unsigned Opcode = I.getOpcode();
2375 // G_PHI requires same handling as PHI
2376 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2377 // Certain non-generic instructions also need some special handling.
2378
2379 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2380 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2381
2382 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2383 const Register DefReg = I.getOperand(0).getReg();
2384 const LLT DefTy = MRI.getType(DefReg);
2385
2386 const RegClassOrRegBank &RegClassOrBank =
2387 MRI.getRegClassOrRegBank(DefReg);
2388
2389 const TargetRegisterClass *DefRC
2390 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2391 if (!DefRC) {
2392 if (!DefTy.isValid()) {
2393 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2394 return false;
2395 }
2396 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2397 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2398 if (!DefRC) {
2399 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2400 return false;
2401 }
2402 }
2403
2404 I.setDesc(TII.get(TargetOpcode::PHI));
2405
2406 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2407 }
2408
2409 if (I.isCopy())
2410 return selectCopy(I, TII, MRI, TRI, RBI);
2411
2412 if (I.isDebugInstr())
2413 return selectDebugInstr(I, MRI, RBI);
2414
2415 return true;
2416 }
2417
2418
2419 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2420 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2421 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2422 return false;
2423 }
2424
2425 // Try to do some lowering before we start instruction selecting. These
2426 // lowerings are purely transformations on the input G_MIR and so selection
2427 // must continue after any modification of the instruction.
2428 if (preISelLower(I)) {
2429 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2430 }
2431
2432 // There may be patterns where the importer can't deal with them optimally,
2433 // but does select it to a suboptimal sequence so our custom C++ selection
2434 // code later never has a chance to work on it. Therefore, we have an early
2435 // selection attempt here to give priority to certain selection routines
2436 // over the imported ones.
2437 if (earlySelect(I))
2438 return true;
2439
2440 if (selectImpl(I, *CoverageInfo))
2441 return true;
2442
2443 LLT Ty =
2444 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2445
2446 switch (Opcode) {
2447 case TargetOpcode::G_SBFX:
2448 case TargetOpcode::G_UBFX: {
2449 static const unsigned OpcTable[2][2] = {
2450 {AArch64::UBFMWri, AArch64::UBFMXri},
2451 {AArch64::SBFMWri, AArch64::SBFMXri}};
2452 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2453 unsigned Size = Ty.getSizeInBits();
2454 unsigned Opc = OpcTable[IsSigned][Size == 64];
2455 auto Cst1 =
2456 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2457 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2457, __extension__ __PRETTY_FUNCTION__))
;
2458 auto Cst2 =
2459 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2460 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2460, __extension__ __PRETTY_FUNCTION__))
;
2461 auto LSB = Cst1->Value.getZExtValue();
2462 auto Width = Cst2->Value.getZExtValue();
2463 auto BitfieldInst =
2464 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2465 .addImm(LSB)
2466 .addImm(LSB + Width - 1);
2467 I.eraseFromParent();
2468 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2469 }
2470 case TargetOpcode::G_BRCOND:
2471 return selectCompareBranch(I, MF, MRI);
2472
2473 case TargetOpcode::G_BRINDIRECT: {
2474 I.setDesc(TII.get(AArch64::BR));
2475 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2476 }
2477
2478 case TargetOpcode::G_BRJT:
2479 return selectBrJT(I, MRI);
2480
2481 case AArch64::G_ADD_LOW: {
2482 // This op may have been separated from it's ADRP companion by the localizer
2483 // or some other code motion pass. Given that many CPUs will try to
2484 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2485 // which will later be expanded into an ADRP+ADD pair after scheduling.
2486 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2487 if (BaseMI->getOpcode() != AArch64::ADRP) {
2488 I.setDesc(TII.get(AArch64::ADDXri));
2489 I.addOperand(MachineOperand::CreateImm(0));
2490 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2491 }
2492 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
2493 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
;
2494 auto Op1 = BaseMI->getOperand(1);
2495 auto Op2 = I.getOperand(2);
2496 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2497 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2498 Op1.getTargetFlags())
2499 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2500 Op2.getTargetFlags());
2501 I.eraseFromParent();
2502 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2503 }
2504
2505 case TargetOpcode::G_BSWAP: {
2506 // Handle vector types for G_BSWAP directly.
2507 Register DstReg = I.getOperand(0).getReg();
2508 LLT DstTy = MRI.getType(DstReg);
2509
2510 // We should only get vector types here; everything else is handled by the
2511 // importer right now.
2512 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2513 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2514 return false;
2515 }
2516
2517 // Only handle 4 and 2 element vectors for now.
2518 // TODO: 16-bit elements.
2519 unsigned NumElts = DstTy.getNumElements();
2520 if (NumElts != 4 && NumElts != 2) {
2521 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2522 return false;
2523 }
2524
2525 // Choose the correct opcode for the supported types. Right now, that's
2526 // v2s32, v4s32, and v2s64.
2527 unsigned Opc = 0;
2528 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2529 if (EltSize == 32)
2530 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2531 : AArch64::REV32v16i8;
2532 else if (EltSize == 64)
2533 Opc = AArch64::REV64v16i8;
2534
2535 // We should always get something by the time we get here...
2536 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2536, __extension__ __PRETTY_FUNCTION__))
;
2537
2538 I.setDesc(TII.get(Opc));
2539 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2540 }
2541
2542 case TargetOpcode::G_FCONSTANT:
2543 case TargetOpcode::G_CONSTANT: {
2544 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2545
2546 const LLT s8 = LLT::scalar(8);
2547 const LLT s16 = LLT::scalar(16);
2548 const LLT s32 = LLT::scalar(32);
2549 const LLT s64 = LLT::scalar(64);
2550 const LLT s128 = LLT::scalar(128);
2551 const LLT p0 = LLT::pointer(0, 64);
2552
2553 const Register DefReg = I.getOperand(0).getReg();
2554 const LLT DefTy = MRI.getType(DefReg);
2555 const unsigned DefSize = DefTy.getSizeInBits();
2556 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2557
2558 // FIXME: Redundant check, but even less readable when factored out.
2559 if (isFP) {
2560 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2561 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2562 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2563 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2564 return false;
2565 }
2566
2567 if (RB.getID() != AArch64::FPRRegBankID) {
2568 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2569 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2570 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2571 return false;
2572 }
2573
2574 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2575 // can be sure tablegen works correctly and isn't rescued by this code.
2576 // 0.0 is not covered by tablegen for FP128. So we will handle this
2577 // scenario in the code here.
2578 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2579 return false;
2580 } else {
2581 // s32 and s64 are covered by tablegen.
2582 if (Ty != p0 && Ty != s8 && Ty != s16) {
2583 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2584 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2585 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2586 return false;
2587 }
2588
2589 if (RB.getID() != AArch64::GPRRegBankID) {
2590 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2591 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2592 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2593 return false;
2594 }
2595 }
2596
2597 if (isFP) {
2598 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2599 // For 16, 64, and 128b values, emit a constant pool load.
2600 switch (DefSize) {
2601 default:
2602 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2602)
;
2603 case 32:
2604 // For s32, use a cp load if we have optsize/minsize.
2605 if (!shouldOptForSize(&MF))
2606 break;
2607 [[fallthrough]];
2608 case 16:
2609 case 64:
2610 case 128: {
2611 auto *FPImm = I.getOperand(1).getFPImm();
2612 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2613 if (!LoadMI) {
2614 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2615 return false;
2616 }
2617 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2618 I.eraseFromParent();
2619 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2620 }
2621 }
2622
2623 // Either emit a FMOV, or emit a copy to emit a normal mov.
2624 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
2625 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
;
2626 const Register DefGPRReg =
2627 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2628 MachineOperand &RegOp = I.getOperand(0);
2629 RegOp.setReg(DefGPRReg);
2630 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2631 MIB.buildCopy({DefReg}, {DefGPRReg});
2632
2633 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2634 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2635 return false;
2636 }
2637
2638 MachineOperand &ImmOp = I.getOperand(1);
2639 // FIXME: Is going through int64_t always correct?
2640 ImmOp.ChangeToImmediate(
2641 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2642 } else if (I.getOperand(1).isCImm()) {
2643 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2644 I.getOperand(1).ChangeToImmediate(Val);
2645 } else if (I.getOperand(1).isImm()) {
2646 uint64_t Val = I.getOperand(1).getImm();
2647 I.getOperand(1).ChangeToImmediate(Val);
2648 }
2649
2650 const unsigned MovOpc =
2651 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2652 I.setDesc(TII.get(MovOpc));
2653 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2654 return true;
2655 }
2656 case TargetOpcode::G_EXTRACT: {
2657 Register DstReg = I.getOperand(0).getReg();
2658 Register SrcReg = I.getOperand(1).getReg();
2659 LLT SrcTy = MRI.getType(SrcReg);
2660 LLT DstTy = MRI.getType(DstReg);
2661 (void)DstTy;
2662 unsigned SrcSize = SrcTy.getSizeInBits();
2663
2664 if (SrcTy.getSizeInBits() > 64) {
2665 // This should be an extract of an s128, which is like a vector extract.
2666 if (SrcTy.getSizeInBits() != 128)
2667 return false;
2668 // Only support extracting 64 bits from an s128 at the moment.
2669 if (DstTy.getSizeInBits() != 64)
2670 return false;
2671
2672 unsigned Offset = I.getOperand(2).getImm();
2673 if (Offset % 64 != 0)
2674 return false;
2675
2676 // Check we have the right regbank always.
2677 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2678 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2679 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2679, __extension__ __PRETTY_FUNCTION__))
;
2680
2681 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2682 auto NewI =
2683 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2684 .addUse(SrcReg, 0,
2685 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2686 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2687 AArch64::GPR64RegClass, NewI->getOperand(0));
2688 I.eraseFromParent();
2689 return true;
2690 }
2691
2692 // Emit the same code as a vector extract.
2693 // Offset must be a multiple of 64.
2694 unsigned LaneIdx = Offset / 64;
2695 MachineInstr *Extract = emitExtractVectorElt(
2696 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2697 if (!Extract)
2698 return false;
2699 I.eraseFromParent();
2700 return true;
2701 }
2702
2703 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2704 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2705 Ty.getSizeInBits() - 1);
2706
2707 if (SrcSize < 64) {
2708 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __extension__ __PRETTY_FUNCTION__))
2709 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __extension__ __PRETTY_FUNCTION__))
;
2710 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2711 }
2712
2713 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2714 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2715 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2716 .addReg(DstReg, 0, AArch64::sub_32);
2717 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2718 AArch64::GPR32RegClass, MRI);
2719 I.getOperand(0).setReg(DstReg);
2720
2721 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2722 }
2723
2724 case TargetOpcode::G_INSERT: {
2725 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2726 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2727 unsigned DstSize = DstTy.getSizeInBits();
2728 // Larger inserts are vectors, same-size ones should be something else by
2729 // now (split up or turned into COPYs).
2730 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2731 return false;
2732
2733 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2734 unsigned LSB = I.getOperand(3).getImm();
2735 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2736 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2737 MachineInstrBuilder(MF, I).addImm(Width - 1);
2738
2739 if (DstSize < 64) {
2740 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2741, __extension__ __PRETTY_FUNCTION__))
2741 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2741, __extension__ __PRETTY_FUNCTION__))
;
2742 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2743 }
2744
2745 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2746 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2747 TII.get(AArch64::SUBREG_TO_REG))
2748 .addDef(SrcReg)
2749 .addImm(0)
2750 .addUse(I.getOperand(2).getReg())
2751 .addImm(AArch64::sub_32);
2752 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2753 AArch64::GPR32RegClass, MRI);
2754 I.getOperand(2).setReg(SrcReg);
2755
2756 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2757 }
2758 case TargetOpcode::G_FRAME_INDEX: {
2759 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2760 if (Ty != LLT::pointer(0, 64)) {
2761 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2762 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2763 return false;
2764 }
2765 I.setDesc(TII.get(AArch64::ADDXri));
2766
2767 // MOs for a #0 shifted immediate.
2768 I.addOperand(MachineOperand::CreateImm(0));
2769 I.addOperand(MachineOperand::CreateImm(0));
2770
2771 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2772 }
2773
2774 case TargetOpcode::G_GLOBAL_VALUE: {
2775 auto GV = I.getOperand(1).getGlobal();
2776 if (GV->isThreadLocal())
2777 return selectTLSGlobalValue(I, MRI);
2778
2779 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2780 if (OpFlags & AArch64II::MO_GOT) {
2781 I.setDesc(TII.get(AArch64::LOADgot));
2782 I.getOperand(1).setTargetFlags(OpFlags);
2783 } else if (TM.getCodeModel() == CodeModel::Large) {
2784 // Materialize the global using movz/movk instructions.
2785 materializeLargeCMVal(I, GV, OpFlags);
2786 I.eraseFromParent();
2787 return true;
2788 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2789 I.setDesc(TII.get(AArch64::ADR));
2790 I.getOperand(1).setTargetFlags(OpFlags);
2791 } else {
2792 I.setDesc(TII.get(AArch64::MOVaddr));
2793 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2794 MachineInstrBuilder MIB(MF, I);
2795 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2796 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2797 }
2798 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2799 }
2800
2801 case TargetOpcode::G_ZEXTLOAD:
2802 case TargetOpcode::G_LOAD:
2803 case TargetOpcode::G_STORE: {
2804 GLoadStore &LdSt = cast<GLoadStore>(I);
2805 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2806 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2807
2808 if (PtrTy != LLT::pointer(0, 64)) {
2809 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2810 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2811 return false;
2812 }
2813
2814 uint64_t MemSizeInBytes = LdSt.getMemSize();
2815 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2816 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2817
2818 // Need special instructions for atomics that affect ordering.
2819 if (Order != AtomicOrdering::NotAtomic &&
2820 Order != AtomicOrdering::Unordered &&
2821 Order != AtomicOrdering::Monotonic) {
2822 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2822, __extension__ __PRETTY_FUNCTION__))
;
2823 if (MemSizeInBytes > 64)
2824 return false;
2825
2826 if (isa<GLoad>(LdSt)) {
2827 static constexpr unsigned LDAPROpcodes[] = {
2828 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2829 static constexpr unsigned LDAROpcodes[] = {
2830 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2831 ArrayRef<unsigned> Opcodes =
2832 STI.hasLDAPR() && Order != AtomicOrdering::SequentiallyConsistent
2833 ? LDAPROpcodes
2834 : LDAROpcodes;
2835 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2836 } else {
2837 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2838 AArch64::STLRW, AArch64::STLRX};
2839 Register ValReg = LdSt.getReg(0);
2840 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2841 // Emit a subreg copy of 32 bits.
2842 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2843 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2844 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2845 I.getOperand(0).setReg(NewVal);
2846 }
2847 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2848 }
2849 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2850 return true;
2851 }
2852
2853#ifndef NDEBUG
2854 const Register PtrReg = LdSt.getPointerReg();
2855 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2856 // Check that the pointer register is valid.
2857 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2858, __extension__ __PRETTY_FUNCTION__))
2858 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2858, __extension__ __PRETTY_FUNCTION__))
;
2859 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2860, __extension__ __PRETTY_FUNCTION__))
2860 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2860, __extension__ __PRETTY_FUNCTION__))
;
2861#endif
2862
2863 const Register ValReg = LdSt.getReg(0);
2864 const LLT ValTy = MRI.getType(ValReg);
2865 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2866
2867 // The code below doesn't support truncating stores, so we need to split it
2868 // again.
2869 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2870 unsigned SubReg;
2871 LLT MemTy = LdSt.getMMO().getMemoryType();
2872 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2873 if (!getSubRegForClass(RC, TRI, SubReg))
2874 return false;
2875
2876 // Generate a subreg copy.
2877 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2878 .addReg(ValReg, 0, SubReg)
2879 .getReg(0);
2880 RBI.constrainGenericRegister(Copy, *RC, MRI);
2881 LdSt.getOperand(0).setReg(Copy);
2882 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2883 // If this is an any-extending load from the FPR bank, split it into a regular
2884 // load + extend.
2885 if (RB.getID() == AArch64::FPRRegBankID) {
2886 unsigned SubReg;
2887 LLT MemTy = LdSt.getMMO().getMemoryType();
2888 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2889 if (!getSubRegForClass(RC, TRI, SubReg))
2890 return false;
2891 Register OldDst = LdSt.getReg(0);
2892 Register NewDst =
2893 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2894 LdSt.getOperand(0).setReg(NewDst);
2895 MRI.setRegBank(NewDst, RB);
2896 // Generate a SUBREG_TO_REG to extend it.
2897 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2898 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2899 .addImm(0)
2900 .addUse(NewDst)
2901 .addImm(SubReg);
2902 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2903 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2904 MIB.setInstr(LdSt);
2905 }
2906 }
2907
2908 // Helper lambda for partially selecting I. Either returns the original
2909 // instruction with an updated opcode, or a new instruction.
2910 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2911 bool IsStore = isa<GStore>(I);
1
Assuming 'I' is not a 'GStore'
2912 const unsigned NewOpc =
2913 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2914 if (NewOpc == I.getOpcode())
2
Taking false branch
2915 return nullptr;
2916 // Check if we can fold anything into the addressing mode.
2917 auto AddrModeFns =
2918 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2919 if (!AddrModeFns) {
2920 // Can't fold anything. Use the original instruction.
2921 I.setDesc(TII.get(NewOpc));
2922 I.addOperand(MachineOperand::CreateImm(0));
2923 return &I;
2924 }
2925
2926 // Folded something. Create a new instruction and return it.
2927 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2928 Register CurValReg = I.getOperand(0).getReg();
2929 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2930 NewInst.cloneMemRefs(I);
2931 for (auto &Fn : *AddrModeFns)
2932 Fn(NewInst);
2933 I.eraseFromParent();
2934 return &*NewInst;
2935 };
2936
2937 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2938 if (!LoadStore)
2939 return false;
2940
2941 // If we're storing a 0, use WZR/XZR.
2942 if (Opcode == TargetOpcode::G_STORE) {
2943 auto CVal = getIConstantVRegValWithLookThrough(
2944 LoadStore->getOperand(0).getReg(), MRI);
2945 if (CVal && CVal->Value == 0) {
2946 switch (LoadStore->getOpcode()) {
2947 case AArch64::STRWui:
2948 case AArch64::STRHHui:
2949 case AArch64::STRBBui:
2950 LoadStore->getOperand(0).setReg(AArch64::WZR);
2951 break;
2952 case AArch64::STRXui:
2953 LoadStore->getOperand(0).setReg(AArch64::XZR);
2954 break;
2955 }
2956 }
2957 }
2958
2959 if (IsZExtLoad) {
2960 // The zextload from a smaller type to i32 should be handled by the
2961 // importer.
2962 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2963 return false;
2964 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2965 // and zero_extend with SUBREG_TO_REG.
2966 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2967 Register DstReg = LoadStore->getOperand(0).getReg();
2968 LoadStore->getOperand(0).setReg(LdReg);
2969
2970 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2971 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2972 .addImm(0)
2973 .addUse(LdReg)
2974 .addImm(AArch64::sub_32);
2975 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2976 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2977 MRI);
2978 }
2979 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2980 }
2981
2982 case TargetOpcode::G_SMULH:
2983 case TargetOpcode::G_UMULH: {
2984 // Reject the various things we don't support yet.
2985 if (unsupportedBinOp(I, RBI, MRI, TRI))
2986 return false;
2987
2988 const Register DefReg = I.getOperand(0).getReg();
2989 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2990
2991 if (RB.getID() != AArch64::GPRRegBankID) {
2992 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2993 return false;
2994 }
2995
2996 if (Ty != LLT::scalar(64)) {
2997 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2998 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2999 return false;
3000 }
3001
3002 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3003 : AArch64::UMULHrr;
3004 I.setDesc(TII.get(NewOpc));
3005
3006 // Now that we selected an opcode, we need to constrain the register
3007 // operands to use appropriate classes.
3008 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3009 }
3010 case TargetOpcode::G_LSHR:
3011 case TargetOpcode::G_ASHR:
3012 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3013 return selectVectorAshrLshr(I, MRI);
3014 [[fallthrough]];
3015 case TargetOpcode::G_SHL:
3016 if (Opcode == TargetOpcode::G_SHL &&
3017 MRI.getType(I.getOperand(0).getReg()).isVector())
3018 return selectVectorSHL(I, MRI);
3019
3020 // These shifts were legalized to have 64 bit shift amounts because we
3021 // want to take advantage of the selection patterns that assume the
3022 // immediates are s64s, however, selectBinaryOp will assume both operands
3023 // will have the same bit size.
3024 {
3025 Register SrcReg = I.getOperand(1).getReg();
3026 Register ShiftReg = I.getOperand(2).getReg();
3027 const LLT ShiftTy = MRI.getType(ShiftReg);
3028 const LLT SrcTy = MRI.getType(SrcReg);
3029 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3030 ShiftTy.getSizeInBits() == 64) {
3031 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3031, __extension__ __PRETTY_FUNCTION__))
;
3032 // Insert a subregister copy to implement a 64->32 trunc
3033 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3034 .addReg(ShiftReg, 0, AArch64::sub_32);
3035 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3036 I.getOperand(2).setReg(Trunc.getReg(0));
3037 }
3038 }
3039 [[fallthrough]];
3040 case TargetOpcode::G_OR: {
3041 // Reject the various things we don't support yet.
3042 if (unsupportedBinOp(I, RBI, MRI, TRI))
3043 return false;
3044
3045 const unsigned OpSize = Ty.getSizeInBits();
3046
3047 const Register DefReg = I.getOperand(0).getReg();
3048 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3049
3050 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3051 if (NewOpc == I.getOpcode())
3052 return false;
3053
3054 I.setDesc(TII.get(NewOpc));
3055 // FIXME: Should the type be always reset in setDesc?
3056
3057 // Now that we selected an opcode, we need to constrain the register
3058 // operands to use appropriate classes.
3059 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3060 }
3061
3062 case TargetOpcode::G_PTR_ADD: {
3063 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3064 I.eraseFromParent();
3065 return true;
3066 }
3067 case TargetOpcode::G_SADDO:
3068 case TargetOpcode::G_UADDO:
3069 case TargetOpcode::G_SSUBO:
3070 case TargetOpcode::G_USUBO: {
3071 // Emit the operation and get the correct condition code.
3072 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3073 I.getOperand(2), I.getOperand(3), MIB);
3074
3075 // Now, put the overflow result in the register given by the first operand
3076 // to the overflow op. CSINC increments the result when the predicate is
3077 // false, so to get the increment when it's true, we need to use the
3078 // inverse. In this case, we want to increment when carry is set.
3079 Register ZReg = AArch64::WZR;
3080 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3081 getInvertedCondCode(OpAndCC.second), MIB);
3082 I.eraseFromParent();
3083 return true;
3084 }
3085
3086 case TargetOpcode::G_PTRMASK: {
3087 Register MaskReg = I.getOperand(2).getReg();
3088 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3089 // TODO: Implement arbitrary cases
3090 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3091 return false;
3092
3093 uint64_t Mask = *MaskVal;
3094 I.setDesc(TII.get(AArch64::ANDXri));
3095 I.getOperand(2).ChangeToImmediate(
3096 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3097
3098 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3099 }
3100 case TargetOpcode::G_PTRTOINT:
3101 case TargetOpcode::G_TRUNC: {
3102 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3103 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3104
3105 const Register DstReg = I.getOperand(0).getReg();
3106 const Register SrcReg = I.getOperand(1).getReg();
3107
3108 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3109 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3110
3111 if (DstRB.getID() != SrcRB.getID()) {
3112 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3113 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3114 return false;
3115 }
3116
3117 if (DstRB.getID() == AArch64::GPRRegBankID) {
3118 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3119 if (!DstRC)
3120 return false;
3121
3122 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3123 if (!SrcRC)
3124 return false;
3125
3126 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3127 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3128 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3129 return false;
3130 }
3131
3132 if (DstRC == SrcRC) {
3133 // Nothing to be done
3134 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3135 SrcTy == LLT::scalar(64)) {
3136 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3136)
;
3137 return false;
3138 } else if (DstRC == &AArch64::GPR32RegClass &&
3139 SrcRC == &AArch64::GPR64RegClass) {
3140 I.getOperand(1).setSubReg(AArch64::sub_32);
3141 } else {
3142 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3143 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3144 return false;
3145 }
3146
3147 I.setDesc(TII.get(TargetOpcode::COPY));
3148 return true;
3149 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3150 if (DstTy == LLT::fixed_vector(4, 16) &&
3151 SrcTy == LLT::fixed_vector(4, 32)) {
3152 I.setDesc(TII.get(AArch64::XTNv4i16));
3153 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3154 return true;
3155 }
3156
3157 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3158 MachineInstr *Extract = emitExtractVectorElt(
3159 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3160 if (!Extract)
3161 return false;
3162 I.eraseFromParent();
3163 return true;
3164 }
3165
3166 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3167 if (Opcode == TargetOpcode::G_PTRTOINT) {
3168 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3168, __extension__ __PRETTY_FUNCTION__))
;
3169 I.setDesc(TII.get(TargetOpcode::COPY));
3170 return selectCopy(I, TII, MRI, TRI, RBI);
3171 }
3172 }
3173
3174 return false;
3175 }
3176
3177 case TargetOpcode::G_ANYEXT: {
3178 if (selectUSMovFromExtend(I, MRI))
3179 return true;
3180
3181 const Register DstReg = I.getOperand(0).getReg();
3182 const Register SrcReg = I.getOperand(1).getReg();
3183
3184 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3185 if (RBDst.getID() != AArch64::GPRRegBankID) {
3186 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3187 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3188 return false;
3189 }
3190
3191 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3192 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3193 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3194 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3195 return false;
3196 }
3197
3198 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3199
3200 if (DstSize == 0) {
3201 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3202 return false;
3203 }
3204
3205 if (DstSize != 64 && DstSize > 32) {
3206 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3207 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3208 return false;
3209 }
3210 // At this point G_ANYEXT is just like a plain COPY, but we need
3211 // to explicitly form the 64-bit value if any.
3212 if (DstSize > 32) {
3213 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3214 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3215 .addDef(ExtSrc)
3216 .addImm(0)
3217 .addUse(SrcReg)
3218 .addImm(AArch64::sub_32);
3219 I.getOperand(1).setReg(ExtSrc);
3220 }
3221 return selectCopy(I, TII, MRI, TRI, RBI);
3222 }
3223
3224 case TargetOpcode::G_ZEXT:
3225 case TargetOpcode::G_SEXT_INREG:
3226 case TargetOpcode::G_SEXT: {
3227 if (selectUSMovFromExtend(I, MRI))
3228 return true;
3229
3230 unsigned Opcode = I.getOpcode();
3231 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3232 const Register DefReg = I.getOperand(0).getReg();
3233 Register SrcReg = I.getOperand(1).getReg();
3234 const LLT DstTy = MRI.getType(DefReg);
3235 const LLT SrcTy = MRI.getType(SrcReg);
3236 unsigned DstSize = DstTy.getSizeInBits();
3237 unsigned SrcSize = SrcTy.getSizeInBits();
3238
3239 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3240 // extended is encoded in the imm.
3241 if (Opcode == TargetOpcode::G_SEXT_INREG)
3242 SrcSize = I.getOperand(2).getImm();
3243
3244 if (DstTy.isVector())
3245 return false; // Should be handled by imported patterns.
3246
3247 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
3248 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
3249 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
;
3250
3251 MachineInstr *ExtI;
3252
3253 // First check if we're extending the result of a load which has a dest type
3254 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3255 // GPR register on AArch64 and all loads which are smaller automatically
3256 // zero-extend the upper bits. E.g.
3257 // %v(s8) = G_LOAD %p, :: (load 1)
3258 // %v2(s32) = G_ZEXT %v(s8)
3259 if (!IsSigned) {
3260 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3261 bool IsGPR =
3262 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3263 if (LoadMI && IsGPR) {
3264 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3265 unsigned BytesLoaded = MemOp->getSize();
3266 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3267 return selectCopy(I, TII, MRI, TRI, RBI);
3268 }
3269
3270 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3271 // + SUBREG_TO_REG.
3272 //
3273 // If we are zero extending from 32 bits to 64 bits, it's possible that
3274 // the instruction implicitly does the zero extend for us. In that case,
3275 // we only need the SUBREG_TO_REG.
3276 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3277 // Unlike with the G_LOAD case, we don't want to look through copies
3278 // here. (See isDef32.)
3279 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3280 Register SubregToRegSrc = SrcReg;
3281
3282 // Does the instruction implicitly zero extend?
3283 if (!Def || !isDef32(*Def)) {
3284 // No. Zero out using an OR.
3285 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3286 const Register ZReg = AArch64::WZR;
3287 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3288 SubregToRegSrc = OrDst;
3289 }
3290
3291 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3292 .addImm(0)
3293 .addUse(SubregToRegSrc)
3294 .addImm(AArch64::sub_32);
3295
3296 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3297 MRI)) {
3298 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3299 return false;
3300 }
3301
3302 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3303 MRI)) {
3304 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3305 return false;
3306 }
3307
3308 I.eraseFromParent();
3309 return true;
3310 }
3311 }
3312
3313 if (DstSize == 64) {
3314 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3315 // FIXME: Can we avoid manually doing this?
3316 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3317 MRI)) {
3318 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3319 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3320 return false;
3321 }
3322 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3323 {&AArch64::GPR64RegClass}, {})
3324 .addImm(0)
3325 .addUse(SrcReg)
3326 .addImm(AArch64::sub_32)
3327 .getReg(0);
3328 }
3329
3330 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3331 {DefReg}, {SrcReg})
3332 .addImm(0)
3333 .addImm(SrcSize - 1);
3334 } else if (DstSize <= 32) {
3335 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3336 {DefReg}, {SrcReg})
3337 .addImm(0)
3338 .addImm(SrcSize - 1);
3339 } else {
3340 return false;
3341 }
3342
3343 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3344 I.eraseFromParent();
3345 return true;
3346 }
3347
3348 case TargetOpcode::G_SITOFP:
3349 case TargetOpcode::G_UITOFP:
3350 case TargetOpcode::G_FPTOSI:
3351 case TargetOpcode::G_FPTOUI: {
3352 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3353 SrcTy = MRI.getType(I.getOperand(1).getReg());
3354 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3355 if (NewOpc == Opcode)
3356 return false;
3357
3358 I.setDesc(TII.get(NewOpc));
3359 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3360 I.setFlags(MachineInstr::NoFPExcept);
3361
3362 return true;
3363 }
3364
3365 case TargetOpcode::G_FREEZE:
3366 return selectCopy(I, TII, MRI, TRI, RBI);
3367
3368 case TargetOpcode::G_INTTOPTR:
3369 // The importer is currently unable to import pointer types since they
3370 // didn't exist in SelectionDAG.
3371 return selectCopy(I, TII, MRI, TRI, RBI);
3372
3373 case TargetOpcode::G_BITCAST:
3374 // Imported SelectionDAG rules can handle every bitcast except those that
3375 // bitcast from a type to the same type. Ideally, these shouldn't occur
3376 // but we might not run an optimizer that deletes them. The other exception
3377 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3378 // of them.
3379 return selectCopy(I, TII, MRI, TRI, RBI);
3380
3381 case TargetOpcode::G_SELECT: {
3382 auto &Sel = cast<GSelect>(I);
3383 const Register CondReg = Sel.getCondReg();
3384 const Register TReg = Sel.getTrueReg();
3385 const Register FReg = Sel.getFalseReg();
3386
3387 if (tryOptSelect(Sel))
3388 return true;
3389
3390 // Make sure to use an unused vreg instead of wzr, so that the peephole
3391 // optimizations will be able to optimize these.
3392 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3393 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3394 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3395 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3396 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3397 return false;
3398 Sel.eraseFromParent();
3399 return true;
3400 }
3401 case TargetOpcode::G_ICMP: {
3402 if (Ty.isVector())
3403 return selectVectorICmp(I, MRI);
3404
3405 if (Ty != LLT::scalar(32)) {
3406 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3407 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3408 return false;
3409 }
3410
3411 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3412 const AArch64CC::CondCode InvCC =
3413 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3414 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3415 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3416 /*Src2=*/AArch64::WZR, InvCC, MIB);
3417 I.eraseFromParent();
3418 return true;
3419 }
3420
3421 case TargetOpcode::G_FCMP: {
3422 CmpInst::Predicate Pred =
3423 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3424 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3425 Pred) ||
3426 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3427 return false;
3428 I.eraseFromParent();
3429 return true;
3430 }
3431 case TargetOpcode::G_VASTART:
3432 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3433 : selectVaStartAAPCS(I, MF, MRI);
3434 case TargetOpcode::G_INTRINSIC:
3435 return selectIntrinsic(I, MRI);
3436 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3437 return selectIntrinsicWithSideEffects(I, MRI);
3438 case TargetOpcode::G_IMPLICIT_DEF: {
3439 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3440 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3441 const Register DstReg = I.getOperand(0).getReg();
3442 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3443 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3444 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3445 return true;
3446 }
3447 case TargetOpcode::G_BLOCK_ADDR: {
3448 if (TM.getCodeModel() == CodeModel::Large) {
3449 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3450 I.eraseFromParent();
3451 return true;
3452 } else {
3453 I.setDesc(TII.get(AArch64::MOVaddrBA));
3454 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3455 I.getOperand(0).getReg())
3456 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3457 /* Offset */ 0, AArch64II::MO_PAGE)
3458 .addBlockAddress(
3459 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3460 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3461 I.eraseFromParent();
3462 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3463 }
3464 }
3465 case AArch64::G_DUP: {
3466 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3467 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3468 // difficult because at RBS we may end up pessimizing the fpr case if we
3469 // decided to add an anyextend to fix this. Manual selection is the most
3470 // robust solution for now.
3471 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3472 AArch64::GPRRegBankID)
3473 return false; // We expect the fpr regbank case to be imported.
3474 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3475 if (VecTy == LLT::fixed_vector(8, 8))
3476 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3477 else if (VecTy == LLT::fixed_vector(16, 8))
3478 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3479 else if (VecTy == LLT::fixed_vector(4, 16))
3480 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3481 else if (VecTy == LLT::fixed_vector(8, 16))
3482 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3483 else
3484 return false;
3485 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3486 }
3487 case TargetOpcode::G_INTRINSIC_TRUNC:
3488 return selectIntrinsicTrunc(I, MRI);
3489 case TargetOpcode::G_INTRINSIC_ROUND:
3490 return selectIntrinsicRound(I, MRI);
3491 case TargetOpcode::G_BUILD_VECTOR:
3492 return selectBuildVector(I, MRI);
3493 case TargetOpcode::G_MERGE_VALUES:
3494 return selectMergeValues(I, MRI);
3495 case TargetOpcode::G_UNMERGE_VALUES:
3496 return selectUnmergeValues(I, MRI);
3497 case TargetOpcode::G_SHUFFLE_VECTOR:
3498 return selectShuffleVector(I, MRI);
3499 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3500 return selectExtractElt(I, MRI);
3501 case TargetOpcode::G_INSERT_VECTOR_ELT:
3502 return selectInsertElt(I, MRI);
3503 case TargetOpcode::G_CONCAT_VECTORS:
3504 return selectConcatVectors(I, MRI);
3505 case TargetOpcode::G_JUMP_TABLE:
3506 return selectJumpTable(I, MRI);
3507 case TargetOpcode::G_VECREDUCE_FADD:
3508 case TargetOpcode::G_VECREDUCE_ADD:
3509 return selectReduction(I, MRI);
3510 case TargetOpcode::G_MEMCPY:
3511 case TargetOpcode::G_MEMCPY_INLINE:
3512 case TargetOpcode::G_MEMMOVE:
3513 case TargetOpcode::G_MEMSET:
3514 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3514, __extension__ __PRETTY_FUNCTION__))
;
3515 return selectMOPS(I, MRI);
3516 }
3517
3518 return false;
3519}
3520
3521bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3522 MachineRegisterInfo &MRI) {
3523 Register VecReg = I.getOperand(1).getReg();
3524 LLT VecTy = MRI.getType(VecReg);
3525 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3526 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3527 // a subregister copy afterwards.
3528 if (VecTy == LLT::fixed_vector(2, 32)) {
3529 Register DstReg = I.getOperand(0).getReg();
3530 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3531 {VecReg, VecReg});
3532 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3533 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3534 .getReg(0);
3535 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3536 I.eraseFromParent();
3537 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3538 }
3539
3540 unsigned Opc = 0;
3541 if (VecTy == LLT::fixed_vector(16, 8))
3542 Opc = AArch64::ADDVv16i8v;
3543 else if (VecTy == LLT::fixed_vector(8, 16))
3544 Opc = AArch64::ADDVv8i16v;
3545 else if (VecTy == LLT::fixed_vector(4, 32))
3546 Opc = AArch64::ADDVv4i32v;
3547 else if (VecTy == LLT::fixed_vector(2, 64))
3548 Opc = AArch64::ADDPv2i64p;
3549 else {
3550 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3551 return false;
3552 }
3553 I.setDesc(TII.get(Opc));
3554 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3555 }
3556
3557 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3558 unsigned Opc = 0;
3559 if (VecTy == LLT::fixed_vector(2, 32))
3560 Opc = AArch64::FADDPv2i32p;
3561 else if (VecTy == LLT::fixed_vector(2, 64))
3562 Opc = AArch64::FADDPv2i64p;
3563 else {
3564 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3565 return false;
3566 }
3567 I.setDesc(TII.get(Opc));
3568 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3569 }
3570 return false;
3571}
3572
3573bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3574 MachineRegisterInfo &MRI) {
3575 unsigned Mopcode;
3576 switch (GI.getOpcode()) {
3577 case TargetOpcode::G_MEMCPY:
3578 case TargetOpcode::G_MEMCPY_INLINE:
3579 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3580 break;
3581 case TargetOpcode::G_MEMMOVE:
3582 Mopcode = AArch64::MOPSMemoryMovePseudo;
3583 break;
3584 case TargetOpcode::G_MEMSET:
3585 // For tagged memset see llvm.aarch64.mops.memset.tag
3586 Mopcode = AArch64::MOPSMemorySetPseudo;
3587 break;
3588 }
3589
3590 auto &DstPtr = GI.getOperand(0);
3591 auto &SrcOrVal = GI.getOperand(1);
3592 auto &Size = GI.getOperand(2);
3593
3594 // Create copies of the registers that can be clobbered.
3595 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3596 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3597 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3598
3599 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3600 const auto &SrcValRegClass =
3601 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3602
3603 // Constrain to specific registers
3604 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3605 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3606 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3607
3608 MIB.buildCopy(DstPtrCopy, DstPtr);
3609 MIB.buildCopy(SrcValCopy, SrcOrVal);
3610 MIB.buildCopy(SizeCopy, Size);
3611
3612 // New instruction uses the copied registers because it must update them.
3613 // The defs are not used since they don't exist in G_MEM*. They are still
3614 // tied.
3615 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3616 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3617 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3618 if (IsSet) {
3619 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3620 {DstPtrCopy, SizeCopy, SrcValCopy});
3621 } else {
3622 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3623 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3624 {DstPtrCopy, SrcValCopy, SizeCopy});
3625 }
3626
3627 GI.eraseFromParent();
3628 return true;
3629}
3630
3631bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3632 MachineRegisterInfo &MRI) {
3633 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3633, __extension__ __PRETTY_FUNCTION__))
;
3634 Register JTAddr = I.getOperand(0).getReg();
3635 unsigned JTI = I.getOperand(1).getIndex();
3636 Register Index = I.getOperand(2).getReg();
3637
3638 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3639 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3640
3641 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3642 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3643 {TargetReg, ScratchReg}, {JTAddr, Index})
3644 .addJumpTableIndex(JTI);
3645 // Build the indirect branch.
3646 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3647 I.eraseFromParent();
3648 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3649}
3650
3651bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3652 MachineRegisterInfo &MRI) {
3653 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3653, __extension__ __PRETTY_FUNCTION__))
;
3654 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3654, __extension__ __PRETTY_FUNCTION__))
;
3655
3656 Register DstReg = I.getOperand(0).getReg();
3657 unsigned JTI = I.getOperand(1).getIndex();
3658 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3659 auto MovMI =
3660 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3661 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3662 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3663 I.eraseFromParent();
3664 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3665}
3666
3667bool AArch64InstructionSelector::selectTLSGlobalValue(
3668 MachineInstr &I, MachineRegisterInfo &MRI) {
3669 if (!STI.isTargetMachO())
3670 return false;
3671 MachineFunction &MF = *I.getParent()->getParent();
3672 MF.getFrameInfo().setAdjustsStack(true);
3673
3674 const auto &GlobalOp = I.getOperand(1);
3675 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
3676 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
;
3677 const GlobalValue &GV = *GlobalOp.getGlobal();
3678
3679 auto LoadGOT =
3680 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3681 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3682
3683 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3684 {LoadGOT.getReg(0)})
3685 .addImm(0);
3686
3687 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3688 // TLS calls preserve all registers except those that absolutely must be
3689 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3690 // silly).
3691 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3692 .addUse(AArch64::X0, RegState::Implicit)
3693 .addDef(AArch64::X0, RegState::Implicit)
3694 .addRegMask(TRI.getTLSCallPreservedMask());
3695
3696 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3697 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3698 MRI);
3699 I.eraseFromParent();
3700 return true;
3701}
3702
3703bool AArch64InstructionSelector::selectIntrinsicTrunc(
3704 MachineInstr &I, MachineRegisterInfo &MRI) const {
3705 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3706
3707 // Select the correct opcode.
3708 unsigned Opc = 0;
3709 if (!SrcTy.isVector()) {
3710 switch (SrcTy.getSizeInBits()) {
3711 default:
3712 case 16:
3713 Opc = AArch64::FRINTZHr;
3714 break;
3715 case 32:
3716 Opc = AArch64::FRINTZSr;
3717 break;
3718 case 64:
3719 Opc = AArch64::FRINTZDr;
3720 break;
3721 }
3722 } else {
3723 unsigned NumElts = SrcTy.getNumElements();
3724 switch (SrcTy.getElementType().getSizeInBits()) {
3725 default:
3726 break;
3727 case 16:
3728 if (NumElts == 4)
3729 Opc = AArch64::FRINTZv4f16;
3730 else if (NumElts == 8)
3731 Opc = AArch64::FRINTZv8f16;
3732 break;
3733 case 32:
3734 if (NumElts == 2)
3735 Opc = AArch64::FRINTZv2f32;
3736 else if (NumElts == 4)
3737 Opc = AArch64::FRINTZv4f32;
3738 break;
3739 case 64:
3740 if (NumElts == 2)
3741 Opc = AArch64::FRINTZv2f64;
3742 break;
3743 }
3744 }
3745
3746 if (!Opc) {
3747 // Didn't get an opcode above, bail.
3748 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3749 return false;
3750 }
3751
3752 // Legalization would have set us up perfectly for this; we just need to
3753 // set the opcode and move on.
3754 I.setDesc(TII.get(Opc));
3755 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3756}
3757
3758bool AArch64InstructionSelector::selectIntrinsicRound(
3759 MachineInstr &I, MachineRegisterInfo &MRI) const {
3760 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3761
3762 // Select the correct opcode.
3763 unsigned Opc = 0;
3764 if (!SrcTy.isVector()) {
3765 switch (SrcTy.getSizeInBits()) {
3766 default:
3767 case 16:
3768 Opc = AArch64::FRINTAHr;
3769 break;
3770 case 32:
3771 Opc = AArch64::FRINTASr;
3772 break;
3773 case 64:
3774 Opc = AArch64::FRINTADr;
3775 break;
3776 }
3777 } else {
3778 unsigned NumElts = SrcTy.getNumElements();
3779 switch (SrcTy.getElementType().getSizeInBits()) {
3780 default:
3781 break;
3782 case 16:
3783 if (NumElts == 4)
3784 Opc = AArch64::FRINTAv4f16;
3785 else if (NumElts == 8)
3786 Opc = AArch64::FRINTAv8f16;
3787 break;
3788 case 32:
3789 if (NumElts == 2)
3790 Opc = AArch64::FRINTAv2f32;
3791 else if (NumElts == 4)
3792 Opc = AArch64::FRINTAv4f32;
3793 break;
3794 case 64:
3795 if (NumElts == 2)
3796 Opc = AArch64::FRINTAv2f64;
3797 break;
3798 }
3799 }
3800
3801 if (!Opc) {
3802 // Didn't get an opcode above, bail.
3803 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3804 return false;
3805 }
3806
3807 // Legalization would have set us up perfectly for this; we just need to
3808 // set the opcode and move on.
3809 I.setDesc(TII.get(Opc));
3810 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3811}
3812
3813bool AArch64InstructionSelector::selectVectorICmp(
3814 MachineInstr &I, MachineRegisterInfo &MRI) {
3815 Register DstReg = I.getOperand(0).getReg();
3816 LLT DstTy = MRI.getType(DstReg);
3817 Register SrcReg = I.getOperand(2).getReg();
3818 Register Src2Reg = I.getOperand(3).getReg();
3819 LLT SrcTy = MRI.getType(SrcReg);
3820
3821 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3822 unsigned NumElts = DstTy.getNumElements();
3823
3824 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3825 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3826 // Third index is cc opcode:
3827 // 0 == eq
3828 // 1 == ugt
3829 // 2 == uge
3830 // 3 == ult
3831 // 4 == ule
3832 // 5 == sgt
3833 // 6 == sge
3834 // 7 == slt
3835 // 8 == sle
3836 // ne is done by negating 'eq' result.
3837
3838 // This table below assumes that for some comparisons the operands will be
3839 // commuted.
3840 // ult op == commute + ugt op
3841 // ule op == commute + uge op
3842 // slt op == commute + sgt op
3843 // sle op == commute + sge op
3844 unsigned PredIdx = 0;
3845 bool SwapOperands = false;
3846 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3847 switch (Pred) {
3848 case CmpInst::ICMP_NE:
3849 case CmpInst::ICMP_EQ:
3850 PredIdx = 0;
3851 break;
3852 case CmpInst::ICMP_UGT:
3853 PredIdx = 1;
3854 break;
3855 case CmpInst::ICMP_UGE:
3856 PredIdx = 2;
3857 break;
3858 case CmpInst::ICMP_ULT:
3859 PredIdx = 3;
3860 SwapOperands = true;
3861 break;
3862 case CmpInst::ICMP_ULE:
3863 PredIdx = 4;
3864 SwapOperands = true;
3865 break;
3866 case CmpInst::ICMP_SGT:
3867 PredIdx = 5;
3868 break;
3869 case CmpInst::ICMP_SGE:
3870 PredIdx = 6;
3871 break;
3872 case CmpInst::ICMP_SLT:
3873 PredIdx = 7;
3874 SwapOperands = true;
3875 break;
3876 case CmpInst::ICMP_SLE:
3877 PredIdx = 8;
3878 SwapOperands = true;
3879 break;
3880 default:
3881 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3881)
;
3882 return false;
3883 }
3884
3885 // This table obviously should be tablegen'd when we have our GISel native
3886 // tablegen selector.
3887
3888 static const unsigned OpcTable[4][4][9] = {
3889 {
3890 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3891 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3892 0 /* invalid */},
3893 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3894 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3895 0 /* invalid */},
3896 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3897 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3898 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3899 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3900 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3901 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3902 },
3903 {
3904 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3905 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3906 0 /* invalid */},
3907 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3908 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3909 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3910 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3911 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3912 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3913 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3914 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3915 0 /* invalid */}
3916 },
3917 {
3918 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3919 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3920 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3921 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3922 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3923 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3924 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3925 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3926 0 /* invalid */},
3927 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3928 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3929 0 /* invalid */}
3930 },
3931 {
3932 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3933 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3934 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3935 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3936 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3937 0 /* invalid */},
3938 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3939 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3940 0 /* invalid */},
3941 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3942 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3943 0 /* invalid */}
3944 },
3945 };
3946 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3947 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3948 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3949 if (!Opc) {
3950 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3951 return false;
3952 }
3953
3954 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3955 const TargetRegisterClass *SrcRC =
3956 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3957 if (!SrcRC) {
3958 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3959 return false;
3960 }
3961
3962 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3963 if (SrcTy.getSizeInBits() == 128)
3964 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3965
3966 if (SwapOperands)
3967 std::swap(SrcReg, Src2Reg);
3968
3969 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3970 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3971
3972 // Invert if we had a 'ne' cc.
3973 if (NotOpc) {
3974 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3975 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3976 } else {
3977 MIB.buildCopy(DstReg, Cmp.getReg(0));
3978 }
3979 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3980 I.eraseFromParent();
3981 return true;
3982}
3983
3984MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3985 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3986 MachineIRBuilder &MIRBuilder) const {
3987 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3988
3989 auto BuildFn = [&](unsigned SubregIndex) {
3990 auto Ins =
3991 MIRBuilder
3992 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3993 .addImm(SubregIndex);
3994 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3995 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3996 return &*Ins;
3997 };
3998
3999 switch (EltSize) {
4000 case 16:
4001 return BuildFn(AArch64::hsub);
4002 case 32:
4003 return BuildFn(AArch64::ssub);
4004 case 64:
4005 return BuildFn(AArch64::dsub);
4006 default:
4007 return nullptr;
4008 }
4009}
4010
4011bool AArch64InstructionSelector::selectMergeValues(
4012 MachineInstr &I, MachineRegisterInfo &MRI) {
4013 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4013, __extension__ __PRETTY_FUNCTION__))
;
4014 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4015 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4016 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4016, __extension__ __PRETTY_FUNCTION__))
;
4017 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4018
4019 if (I.getNumOperands() != 3)
4020 return false;
4021
4022 // Merging 2 s64s into an s128.
4023 if (DstTy == LLT::scalar(128)) {
4024 if (SrcTy.getSizeInBits() != 64)
4025 return false;
4026 Register DstReg = I.getOperand(0).getReg();
4027 Register Src1Reg = I.getOperand(1).getReg();
4028 Register Src2Reg = I.getOperand(2).getReg();
4029 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4030 MachineInstr *InsMI =
4031 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
4032 if (!InsMI)
4033 return false;
4034 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4035 Src2Reg, /* LaneIdx */ 1, RB, MIB);
4036 if (!Ins2MI)
4037 return false;
4038 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4039 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4040 I.eraseFromParent();
4041 return true;
4042 }
4043
4044 if (RB.getID() != AArch64::GPRRegBankID)
4045 return false;
4046
4047 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4048 return false;
4049
4050 auto *DstRC = &AArch64::GPR64RegClass;
4051 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4052 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4053 TII.get(TargetOpcode::SUBREG_TO_REG))
4054 .addDef(SubToRegDef)
4055 .addImm(0)
4056 .addUse(I.getOperand(1).getReg())
4057 .addImm(AArch64::sub_32);
4058 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4059 // Need to anyext the second scalar before we can use bfm
4060 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4061 TII.get(TargetOpcode::SUBREG_TO_REG))
4062 .addDef(SubToRegDef2)
4063 .addImm(0)
4064 .addUse(I.getOperand(2).getReg())
4065 .addImm(AArch64::sub_32);
4066 MachineInstr &BFM =
4067 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4068 .addDef(I.getOperand(0).getReg())
4069 .addUse(SubToRegDef)
4070 .addUse(SubToRegDef2)
4071 .addImm(32)
4072 .addImm(31);
4073 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4074 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4075 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4076 I.eraseFromParent();
4077 return true;
4078}
4079
4080static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4081 const unsigned EltSize) {
4082 // Choose a lane copy opcode and subregister based off of the size of the
4083 // vector's elements.
4084 switch (EltSize) {
4085 case 8:
4086 CopyOpc = AArch64::DUPi8;
4087 ExtractSubReg = AArch64::bsub;
4088 break;
4089 case 16:
4090 CopyOpc = AArch64::DUPi16;
4091 ExtractSubReg = AArch64::hsub;
4092 break;
4093 case 32:
4094 CopyOpc = AArch64::DUPi32;
4095 ExtractSubReg = AArch64::ssub;
4096 break;
4097 case 64:
4098 CopyOpc = AArch64::DUPi64;
4099 ExtractSubReg = AArch64::dsub;
4100 break;
4101 default:
4102 // Unknown size, bail out.
4103 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4104 return false;
4105 }
4106 return true;
4107}
4108
4109MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4110 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4111 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4112 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4113 unsigned CopyOpc = 0;
4114 unsigned ExtractSubReg = 0;
4115 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4116 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4117 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4118 return nullptr;
4119 }
4120
4121 const TargetRegisterClass *DstRC =
4122 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4123 if (!DstRC) {
4124 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4125 return nullptr;
4126 }
4127
4128 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4129 const LLT &VecTy = MRI.getType(VecReg);
4130 const TargetRegisterClass *VecRC =
4131 getRegClassForTypeOnBank(VecTy, VecRB, true);
4132 if (!VecRC) {
4133 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4134 return nullptr;
4135 }
4136
4137 // The register that we're going to copy into.
4138 Register InsertReg = VecReg;
4139 if (!DstReg)
4140 DstReg = MRI.createVirtualRegister(DstRC);
4141 // If the lane index is 0, we just use a subregister COPY.
4142 if (LaneIdx == 0) {
4143 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4144 .addReg(VecReg, 0, ExtractSubReg);
4145 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4146 return &*Copy;
4147 }
4148
4149 // Lane copies require 128-bit wide registers. If we're dealing with an
4150 // unpacked vector, then we need to move up to that width. Insert an implicit
4151 // def and a subregister insert to get us there.
4152 if (VecTy.getSizeInBits() != 128) {
4153 MachineInstr *ScalarToVector = emitScalarToVector(
4154 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4155 if (!ScalarToVector)
4156 return nullptr;
4157 InsertReg = ScalarToVector->getOperand(0).getReg();
4158 }
4159
4160 MachineInstr *LaneCopyMI =
4161 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4162 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4163
4164 // Make sure that we actually constrain the initial copy.
4165 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4166 return LaneCopyMI;
4167}
4168
4169bool AArch64InstructionSelector::selectExtractElt(
4170 MachineInstr &I, MachineRegisterInfo &MRI) {
4171 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __extension__ __PRETTY_FUNCTION__))
4172 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __extension__ __PRETTY_FUNCTION__))
;
4173 Register DstReg = I.getOperand(0).getReg();
4174 const LLT NarrowTy = MRI.getType(DstReg);
4175 const Register SrcReg = I.getOperand(1).getReg();
4176 const LLT WideTy = MRI.getType(SrcReg);
4177 (void)WideTy;
4178 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
4179 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
;
4180 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4180, __extension__ __PRETTY_FUNCTION__))
;
4181
4182 // Need the lane index to determine the correct copy opcode.
4183 MachineOperand &LaneIdxOp = I.getOperand(2);
4184 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4184, __extension__ __PRETTY_FUNCTION__))
;
4185
4186 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4187 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4188 return false;
4189 }
4190
4191 // Find the index to extract from.
4192 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4193 if (!VRegAndVal)
4194 return false;
4195 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4196
4197
4198 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4199 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4200 LaneIdx, MIB);
4201 if (!Extract)
4202 return false;
4203
4204 I.eraseFromParent();
4205 return true;
4206}
4207
4208bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4209 MachineInstr &I, MachineRegisterInfo &MRI) {
4210 unsigned NumElts = I.getNumOperands() - 1;
4211 Register SrcReg = I.getOperand(NumElts).getReg();
4212 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4213 const LLT SrcTy = MRI.getType(SrcReg);
4214
4215 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4215, __extension__ __PRETTY_FUNCTION__))
;
4216 if (SrcTy.getSizeInBits() > 128) {
4217 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4218 return false;
4219 }
4220
4221 // We implement a split vector operation by treating the sub-vectors as
4222 // scalars and extracting them.
4223 const RegisterBank &DstRB =
4224 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4225 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4226 Register Dst = I.getOperand(OpIdx).getReg();
4227 MachineInstr *Extract =
4228 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4229 if (!Extract)
4230 return false;
4231 }
4232 I.eraseFromParent();
4233 return true;
4234}
4235
4236bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4237 MachineRegisterInfo &MRI) {
4238 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4239, __extension__ __PRETTY_FUNCTION__))
4239 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4239, __extension__ __PRETTY_FUNCTION__))
;
4240
4241 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4242 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4243 AArch64::FPRRegBankID ||
4244 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4245 AArch64::FPRRegBankID) {
4246 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4247 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4248 return false;
4249 }
4250
4251 // The last operand is the vector source register, and every other operand is
4252 // a register to unpack into.
4253 unsigned NumElts = I.getNumOperands() - 1;
4254 Register SrcReg = I.getOperand(NumElts).getReg();
4255 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4256 const LLT WideTy = MRI.getType(SrcReg);
4257 (void)WideTy;
4258 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4259, __extension__ __PRETTY_FUNCTION__))
4259 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4259, __extension__ __PRETTY_FUNCTION__))
;
4260 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
4261 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
;
4262
4263 if (!NarrowTy.isScalar())
4264 return selectSplitVectorUnmerge(I, MRI);
4265
4266 // Choose a lane copy opcode and subregister based off of the size of the
4267 // vector's elements.
4268 unsigned CopyOpc = 0;
4269 unsigned ExtractSubReg = 0;
4270 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4271 return false;
4272
4273 // Set up for the lane copies.
4274 MachineBasicBlock &MBB = *I.getParent();
4275
4276 // Stores the registers we'll be copying from.
4277 SmallVector<Register, 4> InsertRegs;
4278
4279 // We'll use the first register twice, so we only need NumElts-1 registers.
4280 unsigned NumInsertRegs = NumElts - 1;
4281
4282 // If our elements fit into exactly 128 bits, then we can copy from the source
4283 // directly. Otherwise, we need to do a bit of setup with some subregister
4284 // inserts.
4285 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4286 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4287 } else {
4288 // No. We have to perform subregister inserts. For each insert, create an
4289 // implicit def and a subregister insert, and save the register we create.
4290 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4291 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4292 *RBI.getRegBank(SrcReg, MRI, TRI));
4293 unsigned SubReg = 0;
4294 bool Found = getSubRegForClass(RC, TRI, SubReg);
4295 (void)Found;
4296 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4296, __extension__ __PRETTY_FUNCTION__))
;
4297 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4298 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4299 MachineInstr &ImpDefMI =
4300 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4301 ImpDefReg);
4302
4303 // Now, create the subregister insert from SrcReg.
4304 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4305 MachineInstr &InsMI =
4306 *BuildMI(MBB, I, I.getDebugLoc(),
4307 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4308 .addUse(ImpDefReg)
4309 .addUse(SrcReg)
4310 .addImm(SubReg);
4311
4312 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4313 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4314
4315 // Save the register so that we can copy from it after.
4316 InsertRegs.push_back(InsertReg);
4317 }
4318 }
4319
4320 // Now that we've created any necessary subregister inserts, we can
4321 // create the copies.
4322 //
4323 // Perform the first copy separately as a subregister copy.
4324 Register CopyTo = I.getOperand(0).getReg();
4325 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4326 .addReg(InsertRegs[0], 0, ExtractSubReg);
4327 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4328
4329 // Now, perform the remaining copies as vector lane copies.
4330 unsigned LaneIdx = 1;
4331 for (Register InsReg : InsertRegs) {
4332 Register CopyTo = I.getOperand(LaneIdx).getReg();
4333 MachineInstr &CopyInst =
4334 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4335 .addUse(InsReg)
4336 .addImm(LaneIdx);
4337 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4338 ++LaneIdx;
4339 }
4340
4341 // Separately constrain the first copy's destination. Because of the
4342 // limitation in constrainOperandRegClass, we can't guarantee that this will
4343 // actually be constrained. So, do it ourselves using the second operand.
4344 const TargetRegisterClass *RC =
4345 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4346 if (!RC) {
4347 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4348 return false;
4349 }
4350
4351 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4352 I.eraseFromParent();
4353 return true;
4354}
4355
4356bool AArch64InstructionSelector::selectConcatVectors(
4357 MachineInstr &I, MachineRegisterInfo &MRI) {
4358 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4359, __extension__ __PRETTY_FUNCTION__))
4359 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4359, __extension__ __PRETTY_FUNCTION__))
;
4360 Register Dst = I.getOperand(0).getReg();
4361 Register Op1 = I.getOperand(1).getReg();
4362 Register Op2 = I.getOperand(2).getReg();
4363 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4364 if (!ConcatMI)
4365 return false;
4366 I.eraseFromParent();
4367 return true;
4368}
4369
4370unsigned
4371AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4372 MachineFunction &MF) const {
4373 Type *CPTy = CPVal->getType();
4374 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4375
4376 MachineConstantPool *MCP = MF.getConstantPool();
4377 return MCP->getConstantPoolIndex(CPVal, Alignment);
4378}
4379
4380MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4381 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4382 auto &MF = MIRBuilder.getMF();
4383 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4384
4385 auto Adrp =
4386 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4387 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4388
4389 MachineInstr *LoadMI = nullptr;
4390 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4391 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4392 switch (Size) {
4393 case 16:
4394 LoadMI =
4395 &*MIRBuilder
4396 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4397 .addConstantPoolIndex(CPIdx, 0,
4398 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4399 break;
4400 case 8:
4401 LoadMI =
4402 &*MIRBuilder
4403 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4404 .addConstantPoolIndex(CPIdx, 0,
4405 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4406 break;
4407 case 4:
4408 LoadMI =
4409 &*MIRBuilder
4410 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4411 .addConstantPoolIndex(CPIdx, 0,
4412 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4413 break;
4414 case 2:
4415 LoadMI =
4416 &*MIRBuilder
4417 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4418 .addConstantPoolIndex(CPIdx, 0,
4419 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4420 break;
4421 default:
4422 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4423 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4424 return nullptr;
4425 }
4426 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4427 MachineMemOperand::MOLoad,
4428 Size, Align(Size)));
4429 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4430 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4431 return LoadMI;
4432}
4433
4434/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4435/// size and RB.
4436static std::pair<unsigned, unsigned>
4437getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4438 unsigned Opc, SubregIdx;
4439 if (RB.getID() == AArch64::GPRRegBankID) {
4440 if (EltSize == 16) {
4441 Opc = AArch64::INSvi16gpr;
4442 SubregIdx = AArch64::ssub;
4443 } else if (EltSize == 32) {
4444 Opc = AArch64::INSvi32gpr;
4445 SubregIdx = AArch64::ssub;
4446 } else if (EltSize == 64) {
4447 Opc = AArch64::INSvi64gpr;
4448 SubregIdx = AArch64::dsub;
4449 } else {
4450 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4450)
;
4451 }
4452 } else {
4453 if (EltSize == 8) {
4454 Opc = AArch64::INSvi8lane;
4455 SubregIdx = AArch64::bsub;
4456 } else if (EltSize == 16) {
4457 Opc = AArch64::INSvi16lane;
4458 SubregIdx = AArch64::hsub;
4459 } else if (EltSize == 32) {
4460 Opc = AArch64::INSvi32lane;
4461 SubregIdx = AArch64::ssub;
4462 } else if (EltSize == 64) {
4463 Opc = AArch64::INSvi64lane;
4464 SubregIdx = AArch64::dsub;
4465 } else {
4466 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4466)
;
4467 }
4468 }
4469 return std::make_pair(Opc, SubregIdx);
4470}
4471
4472MachineInstr *AArch64InstructionSelector::emitInstr(
4473 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4474 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4475 const ComplexRendererFns &RenderFns) const {
4476 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4476, __extension__ __PRETTY_FUNCTION__))
;
4477 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4478, __extension__ __PRETTY_FUNCTION__))
4478 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4478, __extension__ __PRETTY_FUNCTION__))
;
4479 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4480 if (RenderFns)
4481 for (auto &Fn : *RenderFns)
4482 Fn(MI);
4483 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4484 return &*MI;
4485}
4486
4487MachineInstr *AArch64InstructionSelector::emitAddSub(
4488 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4489 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4490 MachineIRBuilder &MIRBuilder) const {
4491 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4492 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4492, __extension__ __PRETTY_FUNCTION__))
;
4493 auto Ty = MRI.getType(LHS.getReg());
4494 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4494, __extension__ __PRETTY_FUNCTION__))
;
4495 unsigned Size = Ty.getSizeInBits();
4496 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4496, __extension__ __PRETTY_FUNCTION__))
;
4497 bool Is32Bit = Size == 32;
4498
4499 // INSTRri form with positive arithmetic immediate.
4500 if (auto Fns = selectArithImmed(RHS))
4501 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4502 MIRBuilder, Fns);
4503
4504 // INSTRri form with negative arithmetic immediate.
4505 if (auto Fns = selectNegArithImmed(RHS))
4506 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4507 MIRBuilder, Fns);
4508
4509 // INSTRrx form.
4510 if (auto Fns = selectArithExtendedRegister(RHS))
4511 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4512 MIRBuilder, Fns);
4513
4514 // INSTRrs form.
4515 if (auto Fns = selectShiftedRegister(RHS))
4516 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4517 MIRBuilder, Fns);
4518 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4519 MIRBuilder);
4520}
4521
4522MachineInstr *
4523AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4524 MachineOperand &RHS,
4525 MachineIRBuilder &MIRBuilder) const {
4526 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4527 {{AArch64::ADDXri, AArch64::ADDWri},
4528 {AArch64::ADDXrs, AArch64::ADDWrs},
4529 {AArch64::ADDXrr, AArch64::ADDWrr},
4530 {AArch64::SUBXri, AArch64::SUBWri},
4531 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4532 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4533}
4534
4535MachineInstr *
4536AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4537 MachineOperand &RHS,
4538 MachineIRBuilder &MIRBuilder) const {
4539 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4540 {{AArch64::ADDSXri, AArch64::ADDSWri},
4541 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4542 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4543 {AArch64::SUBSXri, AArch64::SUBSWri},
4544 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4545 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4546}
4547
4548MachineInstr *
4549AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4550 MachineOperand &RHS,
4551 MachineIRBuilder &MIRBuilder) const {
4552 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4553 {{AArch64::SUBSXri, AArch64::SUBSWri},
4554 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4555 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4556 {AArch64::ADDSXri, AArch64::ADDSWri},
4557 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4558 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4559}
4560
4561MachineInstr *
4562AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4563 MachineIRBuilder &MIRBuilder) const {
4564 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4565 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4566 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4567 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4568}
4569
4570MachineInstr *
4571AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4572 MachineIRBuilder &MIRBuilder) const {
4573 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4573, __extension__ __PRETTY_FUNCTION__))
;
4574 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4575 LLT Ty = MRI.getType(LHS.getReg());
4576 unsigned RegSize = Ty.getSizeInBits();
4577 bool Is32Bit = (RegSize == 32);
4578 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4579 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4580 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4581 // ANDS needs a logical immediate for its immediate form. Check if we can
4582 // fold one in.
4583 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4584 int64_t Imm = ValAndVReg->Value.getSExtValue();
4585
4586 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4587 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4588 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4589 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4590 return &*TstMI;
4591 }
4592 }
4593
4594 if (auto Fns = selectLogicalShiftedRegister(RHS))
4595 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4596 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4597}
4598
4599MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4600 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4601 MachineIRBuilder &MIRBuilder) const {
4602 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4602, __extension__ __PRETTY_FUNCTION__))
;
4603 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4603, __extension__ __PRETTY_FUNCTION__))
;
4604 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4605 LLT CmpTy = MRI.getType(LHS.getReg());
4606 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4606, __extension__ __PRETTY_FUNCTION__))
;
4607 unsigned Size = CmpTy.getSizeInBits();
4608 (void)Size;
4609 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4609, __extension__ __PRETTY_FUNCTION__))
;
4610 // Fold the compare into a cmn or tst if possible.
4611 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4612 return FoldCmp;
4613 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4614 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4615}
4616
4617MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4618 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4619 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4620#ifndef NDEBUG
4621 LLT Ty = MRI.getType(Dst);
4622 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4623, __extension__ __PRETTY_FUNCTION__))
4623 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4623, __extension__ __PRETTY_FUNCTION__))
;
4624#endif
4625 const Register ZReg = AArch64::WZR;
4626 AArch64CC::CondCode CC1, CC2;
4627 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4628 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4629 if (CC2 == AArch64CC::AL)
4630 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4631 MIRBuilder);
4632 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4633 Register Def1Reg = MRI.createVirtualRegister(RC);
4634 Register Def2Reg = MRI.createVirtualRegister(RC);
4635 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4636 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4637 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4638 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4639 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4640 return &*OrMI;
4641}
4642
4643MachineInstr *
4644AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4645 MachineIRBuilder &MIRBuilder,
4646 Optional<CmpInst::Predicate> Pred) const {
4647 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4648 LLT Ty = MRI.getType(LHS);
4649 if (Ty.isVector())
4650 return nullptr;
4651 unsigned OpSize = Ty.getSizeInBits();
4652 if (OpSize != 32 && OpSize != 64)
4653 return nullptr;
4654
4655 // If this is a compare against +0.0, then we don't have
4656 // to explicitly materialize a constant.
4657 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4658 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4659
4660 auto IsEqualityPred = [](CmpInst::Predicate P) {
4661 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4662 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4663 };
4664 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4665 // Try commutating the operands.
4666 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4667 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4668 ShouldUseImm = true;
4669 std::swap(LHS, RHS);
4670 }
4671 }
4672 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4673 {AArch64::FCMPSri, AArch64::FCMPDri}};
4674 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4675
4676 // Partially build the compare. Decide if we need to add a use for the
4677 // third operand based off whether or not we're comparing against 0.0.
4678 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4679 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4680 if (!ShouldUseImm)
4681 CmpMI.addUse(RHS);
4682 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4683 return &*CmpMI;
4684}
4685
4686MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4687 Optional<Register> Dst, Register Op1, Register Op2,
4688 MachineIRBuilder &MIRBuilder) const {
4689 // We implement a vector concat by:
4690 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4691 // 2. Insert the upper vector into the destination's upper element
4692 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4693 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4694
4695 const LLT Op1Ty = MRI.getType(Op1);
4696 const LLT Op2Ty = MRI.getType(Op2);
4697
4698 if (Op1Ty != Op2Ty) {
4699 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4700 return nullptr;
4701 }
4702 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4702, __extension__ __PRETTY_FUNCTION__))
;
4703
4704 if (Op1Ty.getSizeInBits() >= 128) {
4705 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4706 return nullptr;
4707 }
4708
4709 // At the moment we just support 64 bit vector concats.
4710 if (Op1Ty.getSizeInBits() != 64) {
4711 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4712 return nullptr;
4713 }
4714
4715 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4716 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4717 const TargetRegisterClass *DstRC =
4718 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4719
4720 MachineInstr *WidenedOp1 =
4721 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4722 MachineInstr *WidenedOp2 =
4723 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4724 if (!WidenedOp1 || !WidenedOp2) {
4725 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4726 return nullptr;
4727 }
4728
4729 // Now do the insert of the upper element.
4730 unsigned InsertOpc, InsSubRegIdx;
4731 std::tie(InsertOpc, InsSubRegIdx) =
4732 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4733
4734 if (!Dst)
4735 Dst = MRI.createVirtualRegister(DstRC);
4736 auto InsElt =
4737 MIRBuilder
4738 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4739 .addImm(1) /* Lane index */
4740 .addUse(WidenedOp2->getOperand(0).getReg())
4741 .addImm(0);
4742 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4743 return &*InsElt;
4744}
4745
4746MachineInstr *
4747AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4748 Register Src2, AArch64CC::CondCode Pred,
4749 MachineIRBuilder &MIRBuilder) const {
4750 auto &MRI = *MIRBuilder.getMRI();
4751 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4752 // If we used a register class, then this won't necessarily have an LLT.
4753 // Compute the size based off whether or not we have a class or bank.
4754 unsigned Size;
4755 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4756 Size = TRI.getRegSizeInBits(*RC);
4757 else
4758 Size = MRI.getType(Dst).getSizeInBits();
4759 // Some opcodes use s1.
4760 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4760, __extension__ __PRETTY_FUNCTION__))
;
4761 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4762 unsigned Opc = OpcTable[Size == 64];
4763 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4764 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4765 return &*CSINC;
4766}
4767
4768std::pair<MachineInstr *, AArch64CC::CondCode>
4769AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4770 MachineOperand &LHS,
4771 MachineOperand &RHS,
4772 MachineIRBuilder &MIRBuilder) const {
4773 switch (Opcode) {
4774 default:
4775 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4775)
;
4776 case TargetOpcode::G_SADDO:
4777 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4778 case TargetOpcode::G_UADDO:
4779 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4780 case TargetOpcode::G_SSUBO:
4781 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4782 case TargetOpcode::G_USUBO:
4783 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4784 }
4785}
4786
4787/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4788/// expressed as a conjunction.
4789/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4790/// changing the conditions on the CMP tests.
4791/// (this means we can call emitConjunctionRec() with
4792/// Negate==true on this sub-tree)
4793/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4794/// cannot do the negation naturally. We are required to
4795/// emit the subtree first in this case.
4796/// \param WillNegate Is true if are called when the result of this
4797/// subexpression must be negated. This happens when the
4798/// outer expression is an OR. We can use this fact to know
4799/// that we have a double negation (or (or ...) ...) that
4800/// can be implemented for free.
4801static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4802 bool WillNegate, MachineRegisterInfo &MRI,
4803 unsigned Depth = 0) {
4804 if (!MRI.hasOneNonDBGUse(Val))
4805 return false;
4806 MachineInstr *ValDef = MRI.getVRegDef(Val);
4807 unsigned Opcode = ValDef->getOpcode();
4808 if (isa<GAnyCmp>(ValDef)) {
4809 CanNegate = true;
4810 MustBeFirst = false;
4811 return true;
4812 }
4813 // Protect against exponential runtime and stack overflow.
4814 if (Depth > 6)
4815 return false;
4816 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4817 bool IsOR = Opcode == TargetOpcode::G_OR;
4818 Register O0 = ValDef->getOperand(1).getReg();
4819 Register O1 = ValDef->getOperand(2).getReg();
4820 bool CanNegateL;
4821 bool MustBeFirstL;
4822 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4823 return false;
4824 bool CanNegateR;
4825 bool MustBeFirstR;
4826 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4827 return false;
4828
4829 if (MustBeFirstL && MustBeFirstR)
4830 return false;
4831
4832 if (IsOR) {
4833 // For an OR expression we need to be able to naturally negate at least
4834 // one side or we cannot do the transformation at all.
4835 if (!CanNegateL && !CanNegateR)
4836 return false;
4837 // If we the result of the OR will be negated and we can naturally negate
4838 // the leaves, then this sub-tree as a whole negates naturally.
4839 CanNegate = WillNegate && CanNegateL && CanNegateR;
4840 // If we cannot naturally negate the whole sub-tree, then this must be
4841 // emitted first.
4842 MustBeFirst = !CanNegate;
4843 } else {
4844 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4844, __extension__ __PRETTY_FUNCTION__))
;
4845 // We cannot naturally negate an AND operation.
4846 CanNegate = false;
4847 MustBeFirst = MustBeFirstL || MustBeFirstR;
4848 }
4849 return true;
4850 }
4851 return false;
4852}
4853
4854MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4855 Register LHS, Register RHS, CmpInst::Predicate CC,
4856 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4857 MachineIRBuilder &MIB) const {
4858 // TODO: emit CMN as an optimization.
4859 auto &MRI = *MIB.getMRI();
4860 LLT OpTy = MRI.getType(LHS);
4861 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4861, __extension__ __PRETTY_FUNCTION__))
;
4862 unsigned CCmpOpc;
4863 if (CmpInst::isIntPredicate(CC)) {
4864 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4865 } else {
4866 switch (OpTy.getSizeInBits()) {
4867 case 16:
4868 CCmpOpc = AArch64::FCCMPHrr;
4869 break;
4870 case 32:
4871 CCmpOpc = AArch64::FCCMPSrr;
4872 break;
4873 case 64:
4874 CCmpOpc = AArch64::FCCMPDrr;
4875 break;
4876 default:
4877 return nullptr;
4878 }
4879 }
4880 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4881 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4882 auto CCmp =
4883 MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
4884 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4885 return &*CCmp;
4886}
4887
4888MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4889 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4890 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4891 // We're at a tree leaf, produce a conditional comparison operation.
4892 auto &MRI = *MIB.getMRI();
4893 MachineInstr *ValDef = MRI.getVRegDef(Val);
4894 unsigned Opcode = ValDef->getOpcode();
4895 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4896 Register LHS = Cmp->getLHSReg();
4897 Register RHS = Cmp->getRHSReg();
4898 CmpInst::Predicate CC = Cmp->getCond();
4899 if (Negate)
4900 CC = CmpInst::getInversePredicate(CC);
4901 if (isa<GICmp>(Cmp)) {
4902 OutCC = changeICMPPredToAArch64CC(CC);
4903 } else {
4904 // Handle special FP cases.
4905 AArch64CC::CondCode ExtraCC;
4906 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4907 // Some floating point conditions can't be tested with a single condition
4908 // code. Construct an additional comparison in this case.
4909 if (ExtraCC != AArch64CC::AL) {
4910 MachineInstr *ExtraCmp;
4911 if (!CCOp)
4912 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4913 else
4914 ExtraCmp =
4915 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4916 CCOp = ExtraCmp->getOperand(0).getReg();
4917 Predicate = ExtraCC;
4918 }
4919 }
4920
4921 // Produce a normal comparison if we are first in the chain
4922 if (!CCOp) {
4923 auto Dst = MRI.cloneVirtualRegister(LHS);
4924 if (isa<GICmp>(Cmp))
4925 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4926 return emitFPCompare(Cmp->getOperand(2).getReg(),
4927 Cmp->getOperand(3).getReg(), MIB);
4928 }
4929 // Otherwise produce a ccmp.
4930 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4931 }
4932 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4932, __extension__ __PRETTY_FUNCTION__))
;
4933
4934 bool IsOR = Opcode == TargetOpcode::G_OR;
4935
4936 Register LHS = ValDef->getOperand(1).getReg();
4937 bool CanNegateL;
4938 bool MustBeFirstL;
4939 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4940 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4940, __extension__ __PRETTY_FUNCTION__))
;
4941 (void)ValidL;
4942
4943 Register RHS = ValDef->getOperand(2).getReg();
4944 bool CanNegateR;
4945 bool MustBeFirstR;
4946 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4947 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4947, __extension__ __PRETTY_FUNCTION__))
;
4948 (void)ValidR;
4949
4950 // Swap sub-tree that must come first to the right side.
4951 if (MustBeFirstL) {
4952 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4952, __extension__ __PRETTY_FUNCTION__))
;
4953 std::swap(LHS, RHS);
4954 std::swap(CanNegateL, CanNegateR);
4955 std::swap(MustBeFirstL, MustBeFirstR);
4956 }
4957
4958 bool NegateR;
4959 bool NegateAfterR;
4960 bool NegateL;
4961 bool NegateAfterAll;
4962 if (Opcode == TargetOpcode::G_OR) {
4963 // Swap the sub-tree that we can negate naturally to the left.
4964 if (!CanNegateL) {
4965 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4965, __extension__ __PRETTY_FUNCTION__))
;
4966 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4966, __extension__ __PRETTY_FUNCTION__))
;
4967 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4967, __extension__ __PRETTY_FUNCTION__))
;
4968 std::swap(LHS, RHS);
4969 NegateR = false;
4970 NegateAfterR = true;
4971 } else {
4972 // Negate the left sub-tree if possible, otherwise negate the result.
4973 NegateR = CanNegateR;
4974 NegateAfterR = !CanNegateR;
4975 }
4976 NegateL = true;
4977 NegateAfterAll = !Negate;
4978 } else {
4979 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4980, __extension__ __PRETTY_FUNCTION__))
4980 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4980, __extension__ __PRETTY_FUNCTION__))
;
4981 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4981, __extension__ __PRETTY_FUNCTION__))
;
4982
4983 NegateL = false;
4984 NegateR = false;
4985 NegateAfterR = false;
4986 NegateAfterAll = false;
4987 }
4988
4989 // Emit sub-trees.
4990 AArch64CC::CondCode RHSCC;
4991 MachineInstr *CmpR =
4992 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4993 if (NegateAfterR)
4994 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4995 MachineInstr *CmpL = emitConjunctionRec(
4996 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4997 if (NegateAfterAll)
4998 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4999 return CmpL;
5000}
5001
5002MachineInstr *AArch64InstructionSelector::emitConjunction(
5003 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5004 bool DummyCanNegate;
5005 bool DummyMustBeFirst;
5006 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5007 *MIB.getMRI()))
5008 return nullptr;
5009 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5010}
5011
5012bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5013 MachineInstr &CondMI) {
5014 AArch64CC::CondCode AArch64CC;
5015 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5016 if (!ConjMI)
5017 return false;
5018
5019 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5020 SelI.eraseFromParent();
5021 return true;
5022}
5023
5024bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5025 MachineRegisterInfo &MRI = *MIB.getMRI();
5026 // We want to recognize this pattern:
5027 //
5028 // $z = G_FCMP pred, $x, $y
5029 // ...
5030 // $w = G_SELECT $z, $a, $b
5031 //
5032 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5033 // some copies/truncs in between.)
5034 //
5035 // If we see this, then we can emit something like this:
5036 //
5037 // fcmp $x, $y
5038 // fcsel $w, $a, $b, pred
5039 //
5040 // Rather than emitting both of the rather long sequences in the standard
5041 // G_FCMP/G_SELECT select methods.
5042
5043 // First, check if the condition is defined by a compare.
5044 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5045
5046 // We can only fold if all of the defs have one use.
5047 Register CondDefReg = CondDef->getOperand(0).getReg();
5048 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5049 // Unless it's another select.
5050 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5051 if (CondDef == &UI)
5052 continue;
5053 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5054 return false;
5055 }
5056 }
5057
5058 // Is the condition defined by a compare?
5059 unsigned CondOpc = CondDef->getOpcode();
5060 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5061 if (tryOptSelectConjunction(I, *CondDef))
5062 return true;
5063 return false;
5064 }
5065
5066 AArch64CC::CondCode CondCode;
5067 if (CondOpc == TargetOpcode::G_ICMP) {
5068 auto Pred =
5069 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5070 CondCode = changeICMPPredToAArch64CC(Pred);
5071 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5072 CondDef->getOperand(1), MIB);
5073 } else {
5074 // Get the condition code for the select.
5075 auto Pred =
5076 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5077 AArch64CC::CondCode CondCode2;
5078 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5079
5080 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5081 // instructions to emit the comparison.
5082 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5083 // unnecessary.
5084 if (CondCode2 != AArch64CC::AL)
5085 return false;
5086
5087 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5088 CondDef->getOperand(3).getReg(), MIB)) {
5089 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5090 return false;
5091 }
5092 }
5093
5094 // Emit the select.
5095 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5096 I.getOperand(3).getReg(), CondCode, MIB);
5097 I.eraseFromParent();
5098 return true;
5099}
5100
5101MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5102 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5103 MachineIRBuilder &MIRBuilder) const {
5104 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5105, __extension__ __PRETTY_FUNCTION__))
5105 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5105, __extension__ __PRETTY_FUNCTION__))
;
5106 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5107 // We want to find this sort of thing:
5108 // x = G_SUB 0, y
5109 // G_ICMP z, x
5110 //
5111 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5112 // e.g:
5113 //
5114 // cmn z, y
5115
5116 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5117 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5118 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5119 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5120 // Given this:
5121 //
5122 // x = G_SUB 0, y
5123 // G_ICMP x, z
5124 //
5125 // Produce this:
5126 //
5127 // cmn y, z
5128 if (isCMN(LHSDef, P, MRI))
5129 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5130
5131 // Same idea here, but with the RHS of the compare instead:
5132 //
5133 // Given this:
5134 //
5135 // x = G_SUB 0, y
5136 // G_ICMP z, x
5137 //
5138 // Produce this:
5139 //
5140 // cmn z, y
5141 if (isCMN(RHSDef, P, MRI))
5142 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5143
5144 // Given this:
5145 //
5146 // z = G_AND x, y
5147 // G_ICMP z, 0
5148 //
5149 // Produce this if the compare is signed:
5150 //
5151 // tst x, y
5152 if (!CmpInst::isUnsigned(P) && LHSDef &&
5153 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5154 // Make sure that the RHS is 0.
5155 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5156 if (!ValAndVReg || ValAndVReg->Value != 0)
5157 return nullptr;
5158
5159 return emitTST(LHSDef->getOperand(1),
5160 LHSDef->getOperand(2), MIRBuilder);
5161 }
5162
5163 return nullptr;
5164}
5165
5166bool AArch64InstructionSelector::selectShuffleVector(
5167 MachineInstr &I, MachineRegisterInfo &MRI) {
5168 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5169 Register Src1Reg = I.getOperand(1).getReg();
5170 const LLT Src1Ty = MRI.getType(Src1Reg);
5171 Register Src2Reg = I.getOperand(2).getReg();
5172 const LLT Src2Ty = MRI.getType(Src2Reg);
5173 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5174
5175 MachineBasicBlock &MBB = *I.getParent();
5176 MachineFunction &MF = *MBB.getParent();
5177 LLVMContext &Ctx = MF.getFunction().getContext();
5178
5179 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5180 // it's originated from a <1 x T> type. Those should have been lowered into
5181 // G_BUILD_VECTOR earlier.
5182 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5183 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5184 return false;
5185 }
5186
5187 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5188
5189 SmallVector<Constant *, 64> CstIdxs;
5190 for (int Val : Mask) {
5191 // For now, any undef indexes we'll just assume to be 0. This should be
5192 // optimized in future, e.g. to select DUP etc.
5193 Val = Val < 0 ? 0 : Val;
5194 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5195 unsigned Offset = Byte + Val * BytesPerElt;
5196 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5197 }
5198 }
5199
5200 // Use a constant pool to load the index vector for TBL.
5201 Constant *CPVal = ConstantVector::get(CstIdxs);
5202 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5203 if (!IndexLoad) {
5204 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5205 return false;
5206 }
5207
5208 if (DstTy.getSizeInBits() != 128) {
5209 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5209, __extension__ __PRETTY_FUNCTION__))
;
5210 // This case can be done with TBL1.
5211 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
5212 if (!Concat) {
5213 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5214 return false;
5215 }
5216
5217 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5218 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5219 IndexLoad->getOperand(0).getReg(), MIB);
5220
5221 auto TBL1 = MIB.buildInstr(
5222 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5223 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5224 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5225
5226 auto Copy =
5227 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5228 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5229 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5230 I.eraseFromParent();
5231 return true;
5232 }
5233
5234 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5235 // Q registers for regalloc.
5236 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5237 auto RegSeq = createQTuple(Regs, MIB);
5238 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5239 {RegSeq, IndexLoad->getOperand(0)});
5240 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5241 I.eraseFromParent();
5242 return true;
5243}
5244
5245MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5246 Optional<Register> DstReg, Register SrcReg, Register EltReg,
5247 unsigned LaneIdx, const RegisterBank &RB,
5248 MachineIRBuilder &MIRBuilder) const {
5249 MachineInstr *InsElt = nullptr;
5250 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5251 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5252
5253 // Create a register to define with the insert if one wasn't passed in.
5254 if (!DstReg)
5255 DstReg = MRI.createVirtualRegister(DstRC);
5256
5257 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5258 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5259
5260 if (RB.getID() == AArch64::FPRRegBankID) {
5261 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5262 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5263 .addImm(LaneIdx)
5264 .addUse(InsSub->getOperand(0).getReg())
5265 .addImm(0);
5266 } else {
5267 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5268 .addImm(LaneIdx)
5269 .addUse(EltReg);
5270 }
5271
5272 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5273 return InsElt;
5274}
5275
5276bool AArch64InstructionSelector::selectUSMovFromExtend(
5277 MachineInstr &MI, MachineRegisterInfo &MRI) {
5278 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5279 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5280 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5281 return false;
5282 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5283 const Register DefReg = MI.getOperand(0).getReg();
5284 const LLT DstTy = MRI.getType(DefReg);
5285 unsigned DstSize = DstTy.getSizeInBits();
5286
5287 if (DstSize != 32 && DstSize != 64)
5288 return false;
5289
5290 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5291 MI.getOperand(1).getReg(), MRI);
5292 int64_t Lane;
5293 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5294 return false;
5295 Register Src0 = Extract->getOperand(1).getReg();
5296
5297 const LLT &VecTy = MRI.getType(Src0);
5298
5299 if (VecTy.getSizeInBits() != 128) {
5300 const MachineInstr *ScalarToVector = emitScalarToVector(
5301 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5302 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5302, __extension__ __PRETTY_FUNCTION__))
;
5303 Src0 = ScalarToVector->getOperand(0).getReg();
5304 }
5305
5306 unsigned Opcode;
5307 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5308 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5309 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5310 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5311 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5312 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5313 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5314 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5315 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5316 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5317 else
5318 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5318)
;
5319
5320 // We may need to generate one of these, depending on the type and sign of the
5321 // input:
5322 // DstReg = SMOV Src0, Lane;
5323 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5324 MachineInstr *ExtI = nullptr;
5325 if (DstSize == 64 && !IsSigned) {
5326 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5327 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5328 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5329 .addImm(0)
5330 .addUse(NewReg)
5331 .addImm(AArch64::sub_32);
5332 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5333 } else
5334 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5335
5336 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5337 MI.eraseFromParent();
5338 return true;
5339}
5340
5341bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5342 MachineRegisterInfo &MRI) {
5343 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5343, __extension__ __PRETTY_FUNCTION__))
;
5344
5345 // Get information on the destination.
5346 Register DstReg = I.getOperand(0).getReg();
5347 const LLT DstTy = MRI.getType(DstReg);
5348 unsigned VecSize = DstTy.getSizeInBits();
5349
5350 // Get information on the element we want to insert into the destination.
5351 Register EltReg = I.getOperand(2).getReg();
5352 const LLT EltTy = MRI.getType(EltReg);
5353 unsigned EltSize = EltTy.getSizeInBits();
5354 if (EltSize < 16 || EltSize > 64)
5355 return false; // Don't support all element types yet.
5356
5357 // Find the definition of the index. Bail out if it's not defined by a
5358 // G_CONSTANT.
5359 Register IdxReg = I.getOperand(3).getReg();
5360 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5361 if (!VRegAndVal)
5362 return false;
5363 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5364
5365 // Perform the lane insert.
5366 Register SrcReg = I.getOperand(1).getReg();
5367 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5368
5369 if (VecSize < 128) {
5370 // If the vector we're inserting into is smaller than 128 bits, widen it
5371 // to 128 to do the insert.
5372 MachineInstr *ScalarToVec =
5373 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5374 if (!ScalarToVec)
5375 return false;
5376 SrcReg = ScalarToVec->getOperand(0).getReg();
5377 }
5378
5379 // Create an insert into a new FPR128 register.
5380 // Note that if our vector is already 128 bits, we end up emitting an extra
5381 // register.
5382 MachineInstr *InsMI =
5383 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5384
5385 if (VecSize < 128) {
5386 // If we had to widen to perform the insert, then we have to demote back to
5387 // the original size to get the result we want.
5388 Register DemoteVec = InsMI->getOperand(0).getReg();
5389 const TargetRegisterClass *RC =
5390 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
5391 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5392 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5393 return false;
5394 }
5395 unsigned SubReg = 0;
5396 if (!getSubRegForClass(RC, TRI, SubReg))
5397 return false;
5398 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5399 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5400 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5401 return false;
5402 }
5403 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5404 .addReg(DemoteVec, 0, SubReg);
5405 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5406 } else {
5407 // No widening needed.
5408 InsMI->getOperand(0).setReg(DstReg);
5409 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5410 }
5411
5412 I.eraseFromParent();
5413 return true;
5414}
5415
5416MachineInstr *
5417AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5418 MachineIRBuilder &MIRBuilder,
5419 MachineRegisterInfo &MRI) {
5420 LLT DstTy = MRI.getType(Dst);
5421 unsigned DstSize = DstTy.getSizeInBits();
5422 if (CV->isNullValue()) {
5423 if (DstSize == 128) {
5424 auto Mov =
5425 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5426 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5427 return &*Mov;
5428 }
5429
5430 if (DstSize == 64) {
5431 auto Mov =
5432 MIRBuilder
5433 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5434 .addImm(0);
5435 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5436 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5437 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5438 return &*Copy;
5439 }
5440 }
5441
5442 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5443 if (!CPLoad) {
5444 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5445 return nullptr;
5446 }
5447
5448 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5449 RBI.constrainGenericRegister(
5450 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5451 return &*Copy;
5452}
5453
5454bool AArch64InstructionSelector::tryOptConstantBuildVec(
5455 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5456 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5456, __extension__ __PRETTY_FUNCTION__))
;
5457 unsigned DstSize = DstTy.getSizeInBits();
5458 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5458, __extension__ __PRETTY_FUNCTION__))
;
5459 if (DstSize < 32)
5460 return false;
5461 // Check if we're building a constant vector, in which case we want to
5462 // generate a constant pool load instead of a vector insert sequence.
5463 SmallVector<Constant *, 16> Csts;
5464 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5465 // Try to find G_CONSTANT or G_FCONSTANT
5466 auto *OpMI =
5467 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5468 if (OpMI)
5469 Csts.emplace_back(
5470 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5471 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5472 I.getOperand(Idx).getReg(), MRI)))
5473 Csts.emplace_back(
5474 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5475 else
5476 return false;
5477 }
5478 Constant *CV = ConstantVector::get(Csts);
5479 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5480 return false;
5481 I.eraseFromParent();
5482 return true;
5483}
5484
5485bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5486 MachineInstr &I, MachineRegisterInfo &MRI) {
5487 // Given:
5488 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5489 //
5490 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5491 Register Dst = I.getOperand(0).getReg();
5492 Register EltReg = I.getOperand(1).getReg();
5493 LLT EltTy = MRI.getType(EltReg);
5494 // If the index isn't on the same bank as its elements, then this can't be a
5495 // SUBREG_TO_REG.
5496 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5497 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5498 if (EltRB != DstRB)
5499 return false;
5500 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5501 [&MRI](const MachineOperand &Op) {
5502 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5503 MRI);
5504 }))
5505 return false;
5506 unsigned SubReg;
5507 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5508 if (!EltRC)
5509 return false;
5510 const TargetRegisterClass *DstRC =
5511 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5512 if (!DstRC)
5513 return false;
5514 if (!getSubRegForClass(EltRC, TRI, SubReg))
5515 return false;
5516 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5517 .addImm(0)
5518 .addUse(EltReg)
5519 .addImm(SubReg);
5520 I.eraseFromParent();
5521 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5522 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5523}
5524
5525bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5526 MachineRegisterInfo &MRI) {
5527 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5527, __extension__ __PRETTY_FUNCTION__))
;
5528 // Until we port more of the optimized selections, for now just use a vector
5529 // insert sequence.
5530 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5531 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5532 unsigned EltSize = EltTy.getSizeInBits();
5533
5534 if (tryOptConstantBuildVec(I, DstTy, MRI))
5535 return true;
5536 if (tryOptBuildVecToSubregToReg(I, MRI))
5537 return true;
5538
5539 if (EltSize < 16 || EltSize > 64)
5540 return false; // Don't support all element types yet.
5541 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5542
5543 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5544 MachineInstr *ScalarToVec =
5545 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5546 I.getOperand(1).getReg(), MIB);
5547 if (!ScalarToVec)
5548 return false;
5549
5550 Register DstVec = ScalarToVec->getOperand(0).getReg();
5551 unsigned DstSize = DstTy.getSizeInBits();
5552
5553 // Keep track of the last MI we inserted. Later on, we might be able to save
5554 // a copy using it.
5555 MachineInstr *PrevMI = nullptr;
5556 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5557 // Note that if we don't do a subregister copy, we can end up making an
5558 // extra register.
5559 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5560 MIB);
5561 DstVec = PrevMI->getOperand(0).getReg();
5562 }
5563
5564 // If DstTy's size in bits is less than 128, then emit a subregister copy
5565 // from DstVec to the last register we've defined.
5566 if (DstSize < 128) {
5567 // Force this to be FPR using the destination vector.
5568 const TargetRegisterClass *RC =
5569 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5570 if (!RC)
5571 return false;
5572 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5573 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5574 return false;
5575 }
5576
5577 unsigned SubReg = 0;
5578 if (!getSubRegForClass(RC, TRI, SubReg))
5579 return false;
5580 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5581 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5582 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5583 return false;
5584 }
5585
5586 Register Reg = MRI.createVirtualRegister(RC);
5587 Register DstReg = I.getOperand(0).getReg();
5588
5589 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5590 MachineOperand &RegOp = I.getOperand(1);
5591 RegOp.setReg(Reg);
5592 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5593 } else {
5594 // We don't need a subregister copy. Save a copy by re-using the
5595 // destination register on the final insert.
5596 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5596, __extension__ __PRETTY_FUNCTION__))
;
5597 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5598 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5599 }
5600
5601 I.eraseFromParent();
5602 return true;
5603}
5604
5605bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5606 unsigned NumVecs,
5607 MachineInstr &I) {
5608 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5608, __extension__ __PRETTY_FUNCTION__))
;
5609 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5609, __extension__ __PRETTY_FUNCTION__))
;
5610 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5610, __extension__ __PRETTY_FUNCTION__))
;
5611 auto &MRI = *MIB.getMRI();
5612 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5613 unsigned Size = Ty.getSizeInBits();
5614 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5615, __extension__ __PRETTY_FUNCTION__))
5615 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5615, __extension__ __PRETTY_FUNCTION__))
;
5616 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5617 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5618 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5618, __extension__ __PRETTY_FUNCTION__))
;
5619 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5620 Load.cloneMemRefs(I);
5621 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5622 Register SelectedLoadDst = Load->getOperand(0).getReg();
5623 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5624 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5625 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5626 // Emit the subreg copies and immediately select them.
5627 // FIXME: We should refactor our copy code into an emitCopy helper and
5628 // clean up uses of this pattern elsewhere in the selector.
5629 selectCopy(*Vec, TII, MRI, TRI, RBI);
5630 }
5631 return true;
5632}
5633
5634bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5635 MachineInstr &I, MachineRegisterInfo &MRI) {
5636 // Find the intrinsic ID.
5637 unsigned IntrinID = I.getIntrinsicID();
5638
5639 const LLT S8 = LLT::scalar(8);
5640 const LLT S16 = LLT::scalar(16);
5641 const LLT S32 = LLT::scalar(32);
5642 const LLT S64 = LLT::scalar(64);
5643 const LLT P0 = LLT::pointer(0, 64);
5644 // Select the instruction.
5645 switch (IntrinID) {
5646 default:
5647 return false;
5648 case Intrinsic::aarch64_ldxp:
5649 case Intrinsic::aarch64_ldaxp: {
5650 auto NewI = MIB.buildInstr(
5651 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5652 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5653 {I.getOperand(3)});
5654 NewI.cloneMemRefs(I);
5655 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5656 break;
5657 }
5658 case Intrinsic::trap:
5659 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5660 break;
5661 case Intrinsic::debugtrap:
5662 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5663 break;
5664 case Intrinsic::ubsantrap:
5665 MIB.buildIns