Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 6524, column 65
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/ADT/Optional.h"
24#include "llvm/BinaryFormat/Dwarf.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if None is
148 /// provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *emitFPCompare(Register LHS, Register RHS,
220 MachineIRBuilder &MIRBuilder,
221 Optional<CmpInst::Predicate> = None) const;
222
223 MachineInstr *emitInstr(unsigned Opcode,
224 std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = None) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510//
511/// Given a register bank, and a type, return the smallest register class that
512/// can represent that combination.
513static const TargetRegisterClass *
514getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
515 bool GetAllRegSet = false) {
516 if (RB.getID() == AArch64::GPRRegBankID) {
517 if (Ty.getSizeInBits() <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (Ty.getSizeInBits() == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
523 if (Ty.getSizeInBits() == 128)
524 return &AArch64::XSeqPairsClassRegClass;
525 return nullptr;
526 }
527
528 if (RB.getID() == AArch64::FPRRegBankID) {
529 switch (Ty.getSizeInBits()) {
530 case 8:
531 return &AArch64::FPR8RegClass;
532 case 16:
533 return &AArch64::FPR16RegClass;
534 case 32:
535 return &AArch64::FPR32RegClass;
536 case 64:
537 return &AArch64::FPR64RegClass;
538 case 128:
539 return &AArch64::FPR128RegClass;
540 }
541 return nullptr;
542 }
543
544 return nullptr;
545}
546
547/// Given a register bank, and size in bits, return the smallest register class
548/// that can represent that combination.
549static const TargetRegisterClass *
550getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
551 bool GetAllRegSet = false) {
552 unsigned RegBankID = RB.getID();
553
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
563 }
564
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
567 default:
568 return nullptr;
569 case 8:
570 return &AArch64::FPR8RegClass;
571 case 16:
572 return &AArch64::FPR16RegClass;
573 case 32:
574 return &AArch64::FPR32RegClass;
575 case 64:
576 return &AArch64::FPR64RegClass;
577 case 128:
578 return &AArch64::FPR128RegClass;
579 }
580 }
581
582 return nullptr;
583}
584
585/// Returns the correct subregister to use for a given register class.
586static bool getSubRegForClass(const TargetRegisterClass *RC,
587 const TargetRegisterInfo &TRI, unsigned &SubReg) {
588 switch (TRI.getRegSizeInBits(*RC)) {
589 case 8:
590 SubReg = AArch64::bsub;
591 break;
592 case 16:
593 SubReg = AArch64::hsub;
594 break;
595 case 32:
596 if (RC != &AArch64::FPR32RegClass)
597 SubReg = AArch64::sub_32;
598 else
599 SubReg = AArch64::ssub;
600 break;
601 case 64:
602 SubReg = AArch64::dsub;
603 break;
604 default:
605 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
606 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
607 return false;
608 }
609
610 return true;
611}
612
613/// Returns the minimum size the given register bank can hold.
614static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
615 switch (RB.getID()) {
616 case AArch64::GPRRegBankID:
617 return 32;
618 case AArch64::FPRRegBankID:
619 return 8;
620 default:
621 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 621)
;
622 }
623}
624
625/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
626/// Helper function for functions like createDTuple and createQTuple.
627///
628/// \p RegClassIDs - The list of register class IDs available for some tuple of
629/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
630/// expected to contain between 2 and 4 tuple classes.
631///
632/// \p SubRegs - The list of subregister classes associated with each register
633/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
634/// subregister class. The index of each subregister class is expected to
635/// correspond with the index of each register class.
636///
637/// \returns Either the destination register of REG_SEQUENCE instruction that
638/// was created, or the 0th element of \p Regs if \p Regs contains a single
639/// element.
640static Register createTuple(ArrayRef<Register> Regs,
641 const unsigned RegClassIDs[],
642 const unsigned SubRegs[], MachineIRBuilder &MIB) {
643 unsigned NumRegs = Regs.size();
644 if (NumRegs == 1)
645 return Regs[0];
646 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
647 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
;
648 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
649 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
650 auto RegSequence =
651 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
653 RegSequence.addUse(Regs[I]);
654 RegSequence.addImm(SubRegs[I]);
655 }
656 return RegSequence.getReg(0);
657}
658
659/// Create a tuple of D-registers using the registers in \p Regs.
660static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
666}
667
668/// Create a tuple of Q-registers using the registers in \p Regs.
669static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
675}
676
677static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
678 auto &MI = *Root.getParent();
679 auto &MBB = *MI.getParent();
680 auto &MF = *MBB.getParent();
681 auto &MRI = MF.getRegInfo();
682 uint64_t Immed;
683 if (Root.isImm())
684 Immed = Root.getImm();
685 else if (Root.isCImm())
686 Immed = Root.getCImm()->getZExtValue();
687 else if (Root.isReg()) {
688 auto ValAndVReg =
689 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
690 if (!ValAndVReg)
691 return None;
692 Immed = ValAndVReg->Value.getSExtValue();
693 } else
694 return None;
695 return Immed;
696}
697
698/// Check whether \p I is a currently unsupported binary operation:
699/// - it has an unsized type
700/// - an operand is not a vreg
701/// - all operands are not in the same bank
702/// These are checks that should someday live in the verifier, but right now,
703/// these are mostly limitations of the aarch64 selector.
704static bool unsupportedBinOp(const MachineInstr &I,
705 const AArch64RegisterBankInfo &RBI,
706 const MachineRegisterInfo &MRI,
707 const AArch64RegisterInfo &TRI) {
708 LLT Ty = MRI.getType(I.getOperand(0).getReg());
709 if (!Ty.isValid()) {
710 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
711 return true;
712 }
713
714 const RegisterBank *PrevOpBank = nullptr;
715 for (auto &MO : I.operands()) {
716 // FIXME: Support non-register operands.
717 if (!MO.isReg()) {
718 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
719 return true;
720 }
721
722 // FIXME: Can generic operations have physical registers operands? If
723 // so, this will need to be taught about that, and we'll need to get the
724 // bank out of the minimal class for the register.
725 // Either way, this needs to be documented (and possibly verified).
726 if (!Register::isVirtualRegister(MO.getReg())) {
727 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
728 return true;
729 }
730
731 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
732 if (!OpBank) {
733 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
734 return true;
735 }
736
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
739 return true;
740 }
741 PrevOpBank = OpBank;
742 }
743 return false;
744}
745
746/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
747/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
748/// and of size \p OpSize.
749/// \returns \p GenericOpc if the combination is unsupported.
750static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
751 unsigned OpSize) {
752 switch (RegBankID) {
753 case AArch64::GPRRegBankID:
754 if (OpSize == 32) {
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
762 default:
763 return GenericOpc;
764 }
765 } else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
775 default:
776 return GenericOpc;
777 }
778 }
779 break;
780 case AArch64::FPRRegBankID:
781 switch (OpSize) {
782 case 32:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
792 default:
793 return GenericOpc;
794 }
795 case 64:
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
807 default:
808 return GenericOpc;
809 }
810 }
811 break;
812 }
813 return GenericOpc;
814}
815
816/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
817/// appropriate for the (value) register bank \p RegBankID and of memory access
818/// size \p OpSize. This returns the variant with the base+unsigned-immediate
819/// addressing mode (e.g., LDRXui).
820/// \returns \p GenericOpc if the combination is unsupported.
821static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
822 unsigned OpSize) {
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
824 switch (RegBankID) {
825 case AArch64::GPRRegBankID:
826 switch (OpSize) {
827 case 8:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
829 case 16:
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
831 case 32:
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
833 case 64:
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
835 }
836 break;
837 case AArch64::FPRRegBankID:
838 switch (OpSize) {
839 case 8:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
841 case 16:
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
843 case 32:
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
845 case 64:
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
847 case 128:
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
849 }
850 break;
851 }
852 return GenericOpc;
853}
854
855/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
856/// to \p *To.
857///
858/// E.g "To = COPY SrcReg:SubReg"
859static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
860 const RegisterBankInfo &RBI, Register SrcReg,
861 const TargetRegisterClass *To, unsigned SubReg) {
862 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __extension__ __PRETTY_FUNCTION__))
;
864 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 864, __extension__ __PRETTY_FUNCTION__))
;
865
866 MachineIRBuilder MIB(I);
867 auto SubRegCopy =
868 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
869 MachineOperand &RegOp = I.getOperand(1);
870 RegOp.setReg(SubRegCopy.getReg(0));
871
872 // It's possible that the destination register won't be constrained. Make
873 // sure that happens.
874 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
875 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
876
877 return true;
878}
879
880/// Helper function to get the source and destination register classes for a
881/// copy. Returns a std::pair containing the source register class for the
882/// copy, and the destination register class for the copy. If a register class
883/// cannot be determined, then it will be nullptr.
884static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
885getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
886 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
887 const RegisterBankInfo &RBI) {
888 Register DstReg = I.getOperand(0).getReg();
889 Register SrcReg = I.getOperand(1).getReg();
890 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
891 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
892 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
893 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
894
895 // Special casing for cross-bank copies of s1s. We can technically represent
896 // a 1-bit value with any size of register. The minimum size for a GPR is 32
897 // bits. So, we need to put the FPR on 32 bits as well.
898 //
899 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
900 // then we can pull it into the helpers that get the appropriate class for a
901 // register bank. Or make a new helper that carries along some constraint
902 // information.
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
905
906 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
907 getMinClassForRegBank(DstRegBank, DstSize, true)};
908}
909
910// FIXME: We need some sort of API in RBI/TRI to allow generic code to
911// constrain operands of simple instructions given a TargetRegisterClass
912// and LLT
913static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
914 const RegisterBankInfo &RBI) {
915 for (MachineOperand &MO : I.operands()) {
916 if (!MO.isReg())
917 continue;
918 Register Reg = MO.getReg();
919 if (!Reg)
920 continue;
921 if (Reg.isPhysical())
922 continue;
923 LLT Ty = MRI.getType(Reg);
924 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
925 const TargetRegisterClass *RC =
926 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
927 if (!RC) {
928 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
929 RC = getRegClassForTypeOnBank(Ty, RB);
930 if (!RC) {
931 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
932 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
;
933 break;
934 }
935 }
936 RBI.constrainGenericRegister(Reg, *RC, MRI);
937 }
938
939 return true;
940}
941
942static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
943 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
944 const RegisterBankInfo &RBI) {
945 Register DstReg = I.getOperand(0).getReg();
946 Register SrcReg = I.getOperand(1).getReg();
947 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
948 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
949
950 // Find the correct register classes for the source and destination registers.
951 const TargetRegisterClass *SrcRC;
952 const TargetRegisterClass *DstRC;
953 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
954
955 if (!DstRC) {
956 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
957 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
958 return false;
959 }
960
961 // Is this a copy? If so, then we may need to insert a subregister copy.
962 if (I.isCopy()) {
963 // Yes. Check if there's anything to fix up.
964 if (!SrcRC) {
965 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
966 return false;
967 }
968
969 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
970 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
971 unsigned SubReg;
972
973 // If the source bank doesn't support a subregister copy small enough,
974 // then we first need to copy to the destination bank.
975 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
976 const TargetRegisterClass *DstTempRC =
977 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
978 getSubRegForClass(DstRC, TRI, SubReg);
979
980 MachineIRBuilder MIB(I);
981 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
982 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
983 } else if (SrcSize > DstSize) {
984 // If the source register is bigger than the destination we need to
985 // perform a subregister copy.
986 const TargetRegisterClass *SubRegRC =
987 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
988 getSubRegForClass(SubRegRC, TRI, SubReg);
989 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
990 } else if (DstSize > SrcSize) {
991 // If the destination register is bigger than the source we need to do
992 // a promotion using SUBREG_TO_REG.
993 const TargetRegisterClass *PromotionRC =
994 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
995 getSubRegForClass(SrcRC, TRI, SubReg);
996
997 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
998 BuildMI(*I.getParent(), I, I.getDebugLoc(),
999 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1000 .addImm(0)
1001 .addUse(SrcReg)
1002 .addImm(SubReg);
1003 MachineOperand &RegOp = I.getOperand(1);
1004 RegOp.setReg(PromoteReg);
1005 }
1006
1007 // If the destination is a physical register, then there's nothing to
1008 // change, so we're done.
1009 if (Register::isPhysicalRegister(DstReg))
1010 return true;
1011 }
1012
1013 // No need to constrain SrcReg. It will get constrained when we hit another
1014 // of its use or its defs. Copies do not have constraints.
1015 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1016 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1017 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1018 return false;
1019 }
1020
1021 // If this a GPR ZEXT that we want to just reduce down into a copy.
1022 // The sizes will be mismatched with the source < 32b but that's ok.
1023 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1024 I.setDesc(TII.get(AArch64::COPY));
1025 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1025, __extension__ __PRETTY_FUNCTION__))
;
1026 return selectCopy(I, TII, MRI, TRI, RBI);
1027 }
1028
1029 I.setDesc(TII.get(AArch64::COPY));
1030 return true;
1031}
1032
1033static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1034 if (!DstTy.isScalar() || !SrcTy.isScalar())
1035 return GenericOpc;
1036
1037 const unsigned DstSize = DstTy.getSizeInBits();
1038 const unsigned SrcSize = SrcTy.getSizeInBits();
1039
1040 switch (DstSize) {
1041 case 32:
1042 switch (SrcSize) {
1043 case 32:
1044 switch (GenericOpc) {
1045 case TargetOpcode::G_SITOFP:
1046 return AArch64::SCVTFUWSri;
1047 case TargetOpcode::G_UITOFP:
1048 return AArch64::UCVTFUWSri;
1049 case TargetOpcode::G_FPTOSI:
1050 return AArch64::FCVTZSUWSr;
1051 case TargetOpcode::G_FPTOUI:
1052 return AArch64::FCVTZUUWSr;
1053 default:
1054 return GenericOpc;
1055 }
1056 case 64:
1057 switch (GenericOpc) {
1058 case TargetOpcode::G_SITOFP:
1059 return AArch64::SCVTFUXSri;
1060 case TargetOpcode::G_UITOFP:
1061 return AArch64::UCVTFUXSri;
1062 case TargetOpcode::G_FPTOSI:
1063 return AArch64::FCVTZSUWDr;
1064 case TargetOpcode::G_FPTOUI:
1065 return AArch64::FCVTZUUWDr;
1066 default:
1067 return GenericOpc;
1068 }
1069 default:
1070 return GenericOpc;
1071 }
1072 case 64:
1073 switch (SrcSize) {
1074 case 32:
1075 switch (GenericOpc) {
1076 case TargetOpcode::G_SITOFP:
1077 return AArch64::SCVTFUWDri;
1078 case TargetOpcode::G_UITOFP:
1079 return AArch64::UCVTFUWDri;
1080 case TargetOpcode::G_FPTOSI:
1081 return AArch64::FCVTZSUXSr;
1082 case TargetOpcode::G_FPTOUI:
1083 return AArch64::FCVTZUUXSr;
1084 default:
1085 return GenericOpc;
1086 }
1087 case 64:
1088 switch (GenericOpc) {
1089 case TargetOpcode::G_SITOFP:
1090 return AArch64::SCVTFUXDri;
1091 case TargetOpcode::G_UITOFP:
1092 return AArch64::UCVTFUXDri;
1093 case TargetOpcode::G_FPTOSI:
1094 return AArch64::FCVTZSUXDr;
1095 case TargetOpcode::G_FPTOUI:
1096 return AArch64::FCVTZUUXDr;
1097 default:
1098 return GenericOpc;
1099 }
1100 default:
1101 return GenericOpc;
1102 }
1103 default:
1104 return GenericOpc;
1105 };
1106 return GenericOpc;
1107}
1108
1109MachineInstr *
1110AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1111 Register False, AArch64CC::CondCode CC,
1112 MachineIRBuilder &MIB) const {
1113 MachineRegisterInfo &MRI = *MIB.getMRI();
1114 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1115 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1116 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
;
1117 LLT Ty = MRI.getType(True);
1118 if (Ty.isVector())
1119 return nullptr;
1120 const unsigned Size = Ty.getSizeInBits();
1121 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
1122 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
;
1123 const bool Is32Bit = Size == 32;
1124 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1125 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1126 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1127 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1128 return &*FCSel;
1129 }
1130
1131 // By default, we'll try and emit a CSEL.
1132 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1133 bool Optimized = false;
1134 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1135 &Optimized](Register &Reg, Register &OtherReg,
1136 bool Invert) {
1137 if (Optimized)
1138 return false;
1139
1140 // Attempt to fold:
1141 //
1142 // %sub = G_SUB 0, %x
1143 // %select = G_SELECT cc, %reg, %sub
1144 //
1145 // Into:
1146 // %select = CSNEG %reg, %x, cc
1147 Register MatchReg;
1148 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1149 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1150 Reg = MatchReg;
1151 if (Invert) {
1152 CC = AArch64CC::getInvertedCondCode(CC);
1153 std::swap(Reg, OtherReg);
1154 }
1155 return true;
1156 }
1157
1158 // Attempt to fold:
1159 //
1160 // %xor = G_XOR %x, -1
1161 // %select = G_SELECT cc, %reg, %xor
1162 //
1163 // Into:
1164 // %select = CSINV %reg, %x, cc
1165 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1166 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1167 Reg = MatchReg;
1168 if (Invert) {
1169 CC = AArch64CC::getInvertedCondCode(CC);
1170 std::swap(Reg, OtherReg);
1171 }
1172 return true;
1173 }
1174
1175 // Attempt to fold:
1176 //
1177 // %add = G_ADD %x, 1
1178 // %select = G_SELECT cc, %reg, %add
1179 //
1180 // Into:
1181 // %select = CSINC %reg, %x, cc
1182 if (mi_match(Reg, MRI,
1183 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1184 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1186 Reg = MatchReg;
1187 if (Invert) {
1188 CC = AArch64CC::getInvertedCondCode(CC);
1189 std::swap(Reg, OtherReg);
1190 }
1191 return true;
1192 }
1193
1194 return false;
1195 };
1196
1197 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1198 // true/false values are constants.
1199 // FIXME: All of these patterns already exist in tablegen. We should be
1200 // able to import these.
1201 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1202 &Optimized]() {
1203 if (Optimized)
1204 return false;
1205 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1206 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1207 if (!TrueCst && !FalseCst)
1208 return false;
1209
1210 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1211 if (TrueCst && FalseCst) {
1212 int64_t T = TrueCst->Value.getSExtValue();
1213 int64_t F = FalseCst->Value.getSExtValue();
1214
1215 if (T == 0 && F == 1) {
1216 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1217 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1218 True = ZReg;
1219 False = ZReg;
1220 return true;
1221 }
1222
1223 if (T == 0 && F == -1) {
1224 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1225 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1226 True = ZReg;
1227 False = ZReg;
1228 return true;
1229 }
1230 }
1231
1232 if (TrueCst) {
1233 int64_t T = TrueCst->Value.getSExtValue();
1234 if (T == 1) {
1235 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1236 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1237 True = False;
1238 False = ZReg;
1239 CC = AArch64CC::getInvertedCondCode(CC);
1240 return true;
1241 }
1242
1243 if (T == -1) {
1244 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1245 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1246 True = False;
1247 False = ZReg;
1248 CC = AArch64CC::getInvertedCondCode(CC);
1249 return true;
1250 }
1251 }
1252
1253 if (FalseCst) {
1254 int64_t F = FalseCst->Value.getSExtValue();
1255 if (F == 1) {
1256 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1257 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1258 False = ZReg;
1259 return true;
1260 }
1261
1262 if (F == -1) {
1263 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1264 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1265 False = ZReg;
1266 return true;
1267 }
1268 }
1269 return false;
1270 };
1271
1272 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1273 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1274 Optimized |= TryOptSelectCst();
1275 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1276 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1277 return &*SelectInst;
1278}
1279
1280static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1281 switch (P) {
1282 default:
1283 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1283)
;
1284 case CmpInst::ICMP_NE:
1285 return AArch64CC::NE;
1286 case CmpInst::ICMP_EQ:
1287 return AArch64CC::EQ;
1288 case CmpInst::ICMP_SGT:
1289 return AArch64CC::GT;
1290 case CmpInst::ICMP_SGE:
1291 return AArch64CC::GE;
1292 case CmpInst::ICMP_SLT:
1293 return AArch64CC::LT;
1294 case CmpInst::ICMP_SLE:
1295 return AArch64CC::LE;
1296 case CmpInst::ICMP_UGT:
1297 return AArch64CC::HI;
1298 case CmpInst::ICMP_UGE:
1299 return AArch64CC::HS;
1300 case CmpInst::ICMP_ULT:
1301 return AArch64CC::LO;
1302 case CmpInst::ICMP_ULE:
1303 return AArch64CC::LS;
1304 }
1305}
1306
1307/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1308static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1309 AArch64CC::CondCode &CondCode,
1310 AArch64CC::CondCode &CondCode2) {
1311 CondCode2 = AArch64CC::AL;
1312 switch (CC) {
1313 default:
1314 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1314)
;
1315 case CmpInst::FCMP_OEQ:
1316 CondCode = AArch64CC::EQ;
1317 break;
1318 case CmpInst::FCMP_OGT:
1319 CondCode = AArch64CC::GT;
1320 break;
1321 case CmpInst::FCMP_OGE:
1322 CondCode = AArch64CC::GE;
1323 break;
1324 case CmpInst::FCMP_OLT:
1325 CondCode = AArch64CC::MI;
1326 break;
1327 case CmpInst::FCMP_OLE:
1328 CondCode = AArch64CC::LS;
1329 break;
1330 case CmpInst::FCMP_ONE:
1331 CondCode = AArch64CC::MI;
1332 CondCode2 = AArch64CC::GT;
1333 break;
1334 case CmpInst::FCMP_ORD:
1335 CondCode = AArch64CC::VC;
1336 break;
1337 case CmpInst::FCMP_UNO:
1338 CondCode = AArch64CC::VS;
1339 break;
1340 case CmpInst::FCMP_UEQ:
1341 CondCode = AArch64CC::EQ;
1342 CondCode2 = AArch64CC::VS;
1343 break;
1344 case CmpInst::FCMP_UGT:
1345 CondCode = AArch64CC::HI;
1346 break;
1347 case CmpInst::FCMP_UGE:
1348 CondCode = AArch64CC::PL;
1349 break;
1350 case CmpInst::FCMP_ULT:
1351 CondCode = AArch64CC::LT;
1352 break;
1353 case CmpInst::FCMP_ULE:
1354 CondCode = AArch64CC::LE;
1355 break;
1356 case CmpInst::FCMP_UNE:
1357 CondCode = AArch64CC::NE;
1358 break;
1359 }
1360}
1361
1362/// Convert an IR fp condition code to an AArch64 CC.
1363/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1364/// should be AND'ed instead of OR'ed.
1365static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1366 AArch64CC::CondCode &CondCode,
1367 AArch64CC::CondCode &CondCode2) {
1368 CondCode2 = AArch64CC::AL;
1369 switch (CC) {
1370 default:
1371 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1372 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1372, __extension__ __PRETTY_FUNCTION__))
;
1373 break;
1374 case CmpInst::FCMP_ONE:
1375 // (a one b)
1376 // == ((a olt b) || (a ogt b))
1377 // == ((a ord b) && (a une b))
1378 CondCode = AArch64CC::VC;
1379 CondCode2 = AArch64CC::NE;
1380 break;
1381 case CmpInst::FCMP_UEQ:
1382 // (a ueq b)
1383 // == ((a uno b) || (a oeq b))
1384 // == ((a ule b) && (a uge b))
1385 CondCode = AArch64CC::PL;
1386 CondCode2 = AArch64CC::LE;
1387 break;
1388 }
1389}
1390
1391/// Return a register which can be used as a bit to test in a TB(N)Z.
1392static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1393 MachineRegisterInfo &MRI) {
1394 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1394, __extension__ __PRETTY_FUNCTION__))
;
1395 bool HasZext = false;
1396 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1397 unsigned Opc = MI->getOpcode();
1398
1399 if (!MI->getOperand(0).isReg() ||
1400 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1401 break;
1402
1403 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1404 //
1405 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1406 // on the truncated x is the same as the bit number on x.
1407 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1408 Opc == TargetOpcode::G_TRUNC) {
1409 if (Opc == TargetOpcode::G_ZEXT)
1410 HasZext = true;
1411
1412 Register NextReg = MI->getOperand(1).getReg();
1413 // Did we find something worth folding?
1414 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1415 break;
1416
1417 // NextReg is worth folding. Keep looking.
1418 Reg = NextReg;
1419 continue;
1420 }
1421
1422 // Attempt to find a suitable operation with a constant on one side.
1423 Optional<uint64_t> C;
1424 Register TestReg;
1425 switch (Opc) {
1426 default:
1427 break;
1428 case TargetOpcode::G_AND:
1429 case TargetOpcode::G_XOR: {
1430 TestReg = MI->getOperand(1).getReg();
1431 Register ConstantReg = MI->getOperand(2).getReg();
1432 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1433 if (!VRegAndVal) {
1434 // AND commutes, check the other side for a constant.
1435 // FIXME: Can we canonicalize the constant so that it's always on the
1436 // same side at some point earlier?
1437 std::swap(ConstantReg, TestReg);
1438 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1439 }
1440 if (VRegAndVal) {
1441 if (HasZext)
1442 C = VRegAndVal->Value.getZExtValue();
1443 else
1444 C = VRegAndVal->Value.getSExtValue();
1445 }
1446 break;
1447 }
1448 case TargetOpcode::G_ASHR:
1449 case TargetOpcode::G_LSHR:
1450 case TargetOpcode::G_SHL: {
1451 TestReg = MI->getOperand(1).getReg();
1452 auto VRegAndVal =
1453 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1454 if (VRegAndVal)
1455 C = VRegAndVal->Value.getSExtValue();
1456 break;
1457 }
1458 }
1459
1460 // Didn't find a constant or viable register. Bail out of the loop.
1461 if (!C || !TestReg.isValid())
1462 break;
1463
1464 // We found a suitable instruction with a constant. Check to see if we can
1465 // walk through the instruction.
1466 Register NextReg;
1467 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1468 switch (Opc) {
1469 default:
1470 break;
1471 case TargetOpcode::G_AND:
1472 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1473 if ((*C >> Bit) & 1)
1474 NextReg = TestReg;
1475 break;
1476 case TargetOpcode::G_SHL:
1477 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1478 // the type of the register.
1479 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1480 NextReg = TestReg;
1481 Bit = Bit - *C;
1482 }
1483 break;
1484 case TargetOpcode::G_ASHR:
1485 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1486 // in x
1487 NextReg = TestReg;
1488 Bit = Bit + *C;
1489 if (Bit >= TestRegSize)
1490 Bit = TestRegSize - 1;
1491 break;
1492 case TargetOpcode::G_LSHR:
1493 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1494 if ((Bit + *C) < TestRegSize) {
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 }
1498 break;
1499 case TargetOpcode::G_XOR:
1500 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1501 // appropriate.
1502 //
1503 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1504 //
1505 // tbz x', b -> tbnz x, b
1506 //
1507 // Because x' only has the b-th bit set if x does not.
1508 if ((*C >> Bit) & 1)
1509 Invert = !Invert;
1510 NextReg = TestReg;
1511 break;
1512 }
1513
1514 // Check if we found anything worth folding.
1515 if (!NextReg.isValid())
1516 return Reg;
1517 Reg = NextReg;
1518 }
1519
1520 return Reg;
1521}
1522
1523MachineInstr *AArch64InstructionSelector::emitTestBit(
1524 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1525 MachineIRBuilder &MIB) const {
1526 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
1528 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
;
1529 MachineRegisterInfo &MRI = *MIB.getMRI();
1530
1531 // Attempt to optimize the test bit by walking over instructions.
1532 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1533 LLT Ty = MRI.getType(TestReg);
1534 unsigned Size = Ty.getSizeInBits();
1535 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1535, __extension__ __PRETTY_FUNCTION__))
;
1536 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1536, __extension__ __PRETTY_FUNCTION__))
;
1537
1538 // When the test register is a 64-bit register, we have to narrow to make
1539 // TBNZW work.
1540 bool UseWReg = Bit < 32;
1541 unsigned NecessarySize = UseWReg ? 32 : 64;
1542 if (Size != NecessarySize)
1543 TestReg = moveScalarRegClass(
1544 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1545 MIB);
1546
1547 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1548 {AArch64::TBZW, AArch64::TBNZW}};
1549 unsigned Opc = OpcTable[UseWReg][IsNegative];
1550 auto TestBitMI =
1551 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1552 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1553 return &*TestBitMI;
1554}
1555
1556bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1557 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1558 MachineIRBuilder &MIB) const {
1559 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1559, __extension__ __PRETTY_FUNCTION__))
;
1560 // Given something like this:
1561 //
1562 // %x = ...Something...
1563 // %one = G_CONSTANT i64 1
1564 // %zero = G_CONSTANT i64 0
1565 // %and = G_AND %x, %one
1566 // %cmp = G_ICMP intpred(ne), %and, %zero
1567 // %cmp_trunc = G_TRUNC %cmp
1568 // G_BRCOND %cmp_trunc, %bb.3
1569 //
1570 // We want to try and fold the AND into the G_BRCOND and produce either a
1571 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1572 //
1573 // In this case, we'd get
1574 //
1575 // TBNZ %x %bb.3
1576 //
1577
1578 // Check if the AND has a constant on its RHS which we can use as a mask.
1579 // If it's a power of 2, then it's the same as checking a specific bit.
1580 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1581 auto MaybeBit = getIConstantVRegValWithLookThrough(
1582 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1583 if (!MaybeBit)
1584 return false;
1585
1586 int32_t Bit = MaybeBit->Value.exactLogBase2();
1587 if (Bit < 0)
1588 return false;
1589
1590 Register TestReg = AndInst.getOperand(1).getReg();
1591
1592 // Emit a TB(N)Z.
1593 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1594 return true;
1595}
1596
1597MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1598 bool IsNegative,
1599 MachineBasicBlock *DestMBB,
1600 MachineIRBuilder &MIB) const {
1601 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1601, __extension__ __PRETTY_FUNCTION__))
;
1602 MachineRegisterInfo &MRI = *MIB.getMRI();
1603 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1604 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1605 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
;
1606 auto Ty = MRI.getType(CompareReg);
1607 unsigned Width = Ty.getSizeInBits();
1608 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1608, __extension__ __PRETTY_FUNCTION__))
;
1609 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1609, __extension__ __PRETTY_FUNCTION__))
;
1610 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1611 {AArch64::CBNZW, AArch64::CBNZX}};
1612 unsigned Opc = OpcTable[IsNegative][Width == 64];
1613 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1614 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1615 return &*BranchMI;
1616}
1617
1618bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1619 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1620 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1620, __extension__ __PRETTY_FUNCTION__))
;
1621 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
;
1622 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1623 // totally clean. Some of them require two branches to implement.
1624 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1625 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1626 Pred);
1627 AArch64CC::CondCode CC1, CC2;
1628 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1629 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1630 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1631 if (CC2 != AArch64CC::AL)
1632 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1633 I.eraseFromParent();
1634 return true;
1635}
1636
1637bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1638 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1639 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1639, __extension__ __PRETTY_FUNCTION__))
;
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1640, __extension__ __PRETTY_FUNCTION__))
;
1641 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1642 //
1643 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1644 // instructions will not be produced, as they are conditional branch
1645 // instructions that do not set flags.
1646 if (!ProduceNonFlagSettingCondBr)
1647 return false;
1648
1649 MachineRegisterInfo &MRI = *MIB.getMRI();
1650 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1651 auto Pred =
1652 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1653 Register LHS = ICmp.getOperand(2).getReg();
1654 Register RHS = ICmp.getOperand(3).getReg();
1655
1656 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1657 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1658 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1659
1660 // When we can emit a TB(N)Z, prefer that.
1661 //
1662 // Handle non-commutative condition codes first.
1663 // Note that we don't want to do this when we have a G_AND because it can
1664 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1665 if (VRegAndVal && !AndInst) {
1666 int64_t C = VRegAndVal->Value.getSExtValue();
1667
1668 // When we have a greater-than comparison, we can just test if the msb is
1669 // zero.
1670 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1671 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1672 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1673 I.eraseFromParent();
1674 return true;
1675 }
1676
1677 // When we have a less than comparison, we can just test if the msb is not
1678 // zero.
1679 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1680 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1681 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1682 I.eraseFromParent();
1683 return true;
1684 }
1685 }
1686
1687 // Attempt to handle commutative condition codes. Right now, that's only
1688 // eq/ne.
1689 if (ICmpInst::isEquality(Pred)) {
1690 if (!VRegAndVal) {
1691 std::swap(RHS, LHS);
1692 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1693 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1694 }
1695
1696 if (VRegAndVal && VRegAndVal->Value == 0) {
1697 // If there's a G_AND feeding into this branch, try to fold it away by
1698 // emitting a TB(N)Z instead.
1699 //
1700 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1701 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1702 // would be redundant.
1703 if (AndInst &&
1704 tryOptAndIntoCompareBranch(
1705 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1706 I.eraseFromParent();
1707 return true;
1708 }
1709
1710 // Otherwise, try to emit a CB(N)Z instead.
1711 auto LHSTy = MRI.getType(LHS);
1712 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1713 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1714 I.eraseFromParent();
1715 return true;
1716 }
1717 }
1718 }
1719
1720 return false;
1721}
1722
1723bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1724 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1725 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1725, __extension__ __PRETTY_FUNCTION__))
;
1726 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1726, __extension__ __PRETTY_FUNCTION__))
;
1727 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1728 return true;
1729
1730 // Couldn't optimize. Emit a compare + a Bcc.
1731 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1732 auto PredOp = ICmp.getOperand(1);
1733 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1734 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1735 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1736 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1737 I.eraseFromParent();
1738 return true;
1739}
1740
1741bool AArch64InstructionSelector::selectCompareBranch(
1742 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1743 Register CondReg = I.getOperand(0).getReg();
1744 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1745 // Try to select the G_BRCOND using whatever is feeding the condition if
1746 // possible.
1747 unsigned CCMIOpc = CCMI->getOpcode();
1748 if (CCMIOpc == TargetOpcode::G_FCMP)
1749 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1750 if (CCMIOpc == TargetOpcode::G_ICMP)
1751 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1752
1753 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1754 // instructions will not be produced, as they are conditional branch
1755 // instructions that do not set flags.
1756 if (ProduceNonFlagSettingCondBr) {
1757 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1758 I.getOperand(1).getMBB(), MIB);
1759 I.eraseFromParent();
1760 return true;
1761 }
1762
1763 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1764 auto TstMI =
1765 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1766 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1767 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1768 .addImm(AArch64CC::EQ)
1769 .addMBB(I.getOperand(1).getMBB());
1770 I.eraseFromParent();
1771 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1772}
1773
1774/// Returns the element immediate value of a vector shift operand if found.
1775/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1776static Optional<int64_t> getVectorShiftImm(Register Reg,
1777 MachineRegisterInfo &MRI) {
1778 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1778, __extension__ __PRETTY_FUNCTION__))
;
1779 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1780 return getAArch64VectorSplatScalar(*OpMI, MRI);
1781}
1782
1783/// Matches and returns the shift immediate value for a SHL instruction given
1784/// a shift operand.
1785static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1786 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1787 if (!ShiftImm)
1788 return None;
1789 // Check the immediate is in range for a SHL.
1790 int64_t Imm = *ShiftImm;
1791 if (Imm < 0)
1792 return None;
1793 switch (SrcTy.getElementType().getSizeInBits()) {
1794 default:
1795 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1796 return None;
1797 case 8:
1798 if (Imm > 7)
1799 return None;
1800 break;
1801 case 16:
1802 if (Imm > 15)
1803 return None;
1804 break;
1805 case 32:
1806 if (Imm > 31)
1807 return None;
1808 break;
1809 case 64:
1810 if (Imm > 63)
1811 return None;
1812 break;
1813 }
1814 return Imm;
1815}
1816
1817bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1818 MachineRegisterInfo &MRI) {
1819 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1819, __extension__ __PRETTY_FUNCTION__))
;
1820 Register DstReg = I.getOperand(0).getReg();
1821 const LLT Ty = MRI.getType(DstReg);
1822 Register Src1Reg = I.getOperand(1).getReg();
1823 Register Src2Reg = I.getOperand(2).getReg();
1824
1825 if (!Ty.isVector())
1826 return false;
1827
1828 // Check if we have a vector of constants on RHS that we can select as the
1829 // immediate form.
1830 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1831
1832 unsigned Opc = 0;
1833 if (Ty == LLT::fixed_vector(2, 64)) {
1834 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1835 } else if (Ty == LLT::fixed_vector(4, 32)) {
1836 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1837 } else if (Ty == LLT::fixed_vector(2, 32)) {
1838 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1839 } else if (Ty == LLT::fixed_vector(4, 16)) {
1840 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1841 } else if (Ty == LLT::fixed_vector(8, 16)) {
1842 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1843 } else if (Ty == LLT::fixed_vector(16, 8)) {
1844 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1845 } else if (Ty == LLT::fixed_vector(8, 8)) {
1846 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1847 } else {
1848 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1849 return false;
1850 }
1851
1852 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1853 if (ImmVal)
1854 Shl.addImm(*ImmVal);
1855 else
1856 Shl.addUse(Src2Reg);
1857 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1858 I.eraseFromParent();
1859 return true;
1860}
1861
1862bool AArch64InstructionSelector::selectVectorAshrLshr(
1863 MachineInstr &I, MachineRegisterInfo &MRI) {
1864 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1865, __extension__ __PRETTY_FUNCTION__))
1865 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1865, __extension__ __PRETTY_FUNCTION__))
;
1866 Register DstReg = I.getOperand(0).getReg();
1867 const LLT Ty = MRI.getType(DstReg);
1868 Register Src1Reg = I.getOperand(1).getReg();
1869 Register Src2Reg = I.getOperand(2).getReg();
1870
1871 if (!Ty.isVector())
1872 return false;
1873
1874 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1875
1876 // We expect the immediate case to be lowered in the PostLegalCombiner to
1877 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1878
1879 // There is not a shift right register instruction, but the shift left
1880 // register instruction takes a signed value, where negative numbers specify a
1881 // right shift.
1882
1883 unsigned Opc = 0;
1884 unsigned NegOpc = 0;
1885 const TargetRegisterClass *RC =
1886 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1887 if (Ty == LLT::fixed_vector(2, 64)) {
1888 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1889 NegOpc = AArch64::NEGv2i64;
1890 } else if (Ty == LLT::fixed_vector(4, 32)) {
1891 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1892 NegOpc = AArch64::NEGv4i32;
1893 } else if (Ty == LLT::fixed_vector(2, 32)) {
1894 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1895 NegOpc = AArch64::NEGv2i32;
1896 } else if (Ty == LLT::fixed_vector(4, 16)) {
1897 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1898 NegOpc = AArch64::NEGv4i16;
1899 } else if (Ty == LLT::fixed_vector(8, 16)) {
1900 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1901 NegOpc = AArch64::NEGv8i16;
1902 } else if (Ty == LLT::fixed_vector(16, 8)) {
1903 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1904 NegOpc = AArch64::NEGv16i8;
1905 } else if (Ty == LLT::fixed_vector(8, 8)) {
1906 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1907 NegOpc = AArch64::NEGv8i8;
1908 } else {
1909 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1910 return false;
1911 }
1912
1913 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1914 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1915 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1916 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1917 I.eraseFromParent();
1918 return true;
1919}
1920
1921bool AArch64InstructionSelector::selectVaStartAAPCS(
1922 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1923 return false;
1924}
1925
1926bool AArch64InstructionSelector::selectVaStartDarwin(
1927 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1928 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1929 Register ListReg = I.getOperand(0).getReg();
1930
1931 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1932
1933 auto MIB =
1934 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1935 .addDef(ArgsAddrReg)
1936 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1937 .addImm(0)
1938 .addImm(0);
1939
1940 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1941
1942 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1943 .addUse(ArgsAddrReg)
1944 .addUse(ListReg)
1945 .addImm(0)
1946 .addMemOperand(*I.memoperands_begin());
1947
1948 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1949 I.eraseFromParent();
1950 return true;
1951}
1952
1953void AArch64InstructionSelector::materializeLargeCMVal(
1954 MachineInstr &I, const Value *V, unsigned OpFlags) {
1955 MachineBasicBlock &MBB = *I.getParent();
1956 MachineFunction &MF = *MBB.getParent();
1957 MachineRegisterInfo &MRI = MF.getRegInfo();
1958
1959 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1960 MovZ->addOperand(MF, I.getOperand(1));
1961 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1962 AArch64II::MO_NC);
1963 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1964 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1965
1966 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1967 Register ForceDstReg) {
1968 Register DstReg = ForceDstReg
1969 ? ForceDstReg
1970 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1971 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1972 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1973 MovI->addOperand(MF, MachineOperand::CreateGA(
1974 GV, MovZ->getOperand(1).getOffset(), Flags));
1975 } else {
1976 MovI->addOperand(
1977 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1978 MovZ->getOperand(1).getOffset(), Flags));
1979 }
1980 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1981 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1982 return DstReg;
1983 };
1984 Register DstReg = BuildMovK(MovZ.getReg(0),
1985 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1986 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1987 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1988}
1989
1990bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1991 MachineBasicBlock &MBB = *I.getParent();
1992 MachineFunction &MF = *MBB.getParent();
1993 MachineRegisterInfo &MRI = MF.getRegInfo();
1994
1995 switch (I.getOpcode()) {
1996 case TargetOpcode::G_STORE: {
1997 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1998 MachineOperand &SrcOp = I.getOperand(0);
1999 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2000 // Allow matching with imported patterns for stores of pointers. Unlike
2001 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2002 // and constrain.
2003 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2004 Register NewSrc = Copy.getReg(0);
2005 SrcOp.setReg(NewSrc);
2006 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2007 Changed = true;
2008 }
2009 return Changed;
2010 }
2011 case TargetOpcode::G_PTR_ADD:
2012 return convertPtrAddToAdd(I, MRI);
2013 case TargetOpcode::G_LOAD: {
2014 // For scalar loads of pointers, we try to convert the dest type from p0
2015 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2016 // conversion, this should be ok because all users should have been
2017 // selected already, so the type doesn't matter for them.
2018 Register DstReg = I.getOperand(0).getReg();
2019 const LLT DstTy = MRI.getType(DstReg);
2020 if (!DstTy.isPointer())
2021 return false;
2022 MRI.setType(DstReg, LLT::scalar(64));
2023 return true;
2024 }
2025 case AArch64::G_DUP: {
2026 // Convert the type from p0 to s64 to help selection.
2027 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2028 if (!DstTy.getElementType().isPointer())
2029 return false;
2030 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2031 MRI.setType(I.getOperand(0).getReg(),
2032 DstTy.changeElementType(LLT::scalar(64)));
2033 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2034 I.getOperand(1).setReg(NewSrc.getReg(0));
2035 return true;
2036 }
2037 case TargetOpcode::G_UITOFP:
2038 case TargetOpcode::G_SITOFP: {
2039 // If both source and destination regbanks are FPR, then convert the opcode
2040 // to G_SITOF so that the importer can select it to an fpr variant.
2041 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2042 // copy.
2043 Register SrcReg = I.getOperand(1).getReg();
2044 LLT SrcTy = MRI.getType(SrcReg);
2045 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2046 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2047 return false;
2048
2049 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2050 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2051 I.setDesc(TII.get(AArch64::G_SITOF));
2052 else
2053 I.setDesc(TII.get(AArch64::G_UITOF));
2054 return true;
2055 }
2056 return false;
2057 }
2058 default:
2059 return false;
2060 }
2061}
2062
2063/// This lowering tries to look for G_PTR_ADD instructions and then converts
2064/// them to a standard G_ADD with a COPY on the source.
2065///
2066/// The motivation behind this is to expose the add semantics to the imported
2067/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2068/// because the selector works bottom up, uses before defs. By the time we
2069/// end up trying to select a G_PTR_ADD, we should have already attempted to
2070/// fold this into addressing modes and were therefore unsuccessful.
2071bool AArch64InstructionSelector::convertPtrAddToAdd(
2072 MachineInstr &I, MachineRegisterInfo &MRI) {
2073 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2073, __extension__ __PRETTY_FUNCTION__))
;
2074 Register DstReg = I.getOperand(0).getReg();
2075 Register AddOp1Reg = I.getOperand(1).getReg();
2076 const LLT PtrTy = MRI.getType(DstReg);
2077 if (PtrTy.getAddressSpace() != 0)
2078 return false;
2079
2080 const LLT CastPtrTy =
2081 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2082 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2083 // Set regbanks on the registers.
2084 if (PtrTy.isVector())
2085 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2086 else
2087 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2088
2089 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2090 // %dst(intty) = G_ADD %intbase, off
2091 I.setDesc(TII.get(TargetOpcode::G_ADD));
2092 MRI.setType(DstReg, CastPtrTy);
2093 I.getOperand(1).setReg(PtrToInt.getReg(0));
2094 if (!select(*PtrToInt)) {
2095 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2096 return false;
2097 }
2098
2099 // Also take the opportunity here to try to do some optimization.
2100 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2101 Register NegatedReg;
2102 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2103 return true;
2104 I.getOperand(2).setReg(NegatedReg);
2105 I.setDesc(TII.get(TargetOpcode::G_SUB));
2106 return true;
2107}
2108
2109bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2110 MachineRegisterInfo &MRI) {
2111 // We try to match the immediate variant of LSL, which is actually an alias
2112 // for a special case of UBFM. Otherwise, we fall back to the imported
2113 // selector which will match the register variant.
2114 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2114, __extension__ __PRETTY_FUNCTION__))
;
2115 const auto &MO = I.getOperand(2);
2116 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2117 if (!VRegAndVal)
2118 return false;
2119
2120 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2121 if (DstTy.isVector())
2122 return false;
2123 bool Is64Bit = DstTy.getSizeInBits() == 64;
2124 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2125 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2126
2127 if (!Imm1Fn || !Imm2Fn)
2128 return false;
2129
2130 auto NewI =
2131 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2132 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2133
2134 for (auto &RenderFn : *Imm1Fn)
2135 RenderFn(NewI);
2136 for (auto &RenderFn : *Imm2Fn)
2137 RenderFn(NewI);
2138
2139 I.eraseFromParent();
2140 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2141}
2142
2143bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2144 MachineInstr &I, MachineRegisterInfo &MRI) {
2145 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2145, __extension__ __PRETTY_FUNCTION__))
;
2146 // If we're storing a scalar, it doesn't matter what register bank that
2147 // scalar is on. All that matters is the size.
2148 //
2149 // So, if we see something like this (with a 32-bit scalar as an example):
2150 //
2151 // %x:gpr(s32) = ... something ...
2152 // %y:fpr(s32) = COPY %x:gpr(s32)
2153 // G_STORE %y:fpr(s32)
2154 //
2155 // We can fix this up into something like this:
2156 //
2157 // G_STORE %x:gpr(s32)
2158 //
2159 // And then continue the selection process normally.
2160 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2161 if (!DefDstReg.isValid())
2162 return false;
2163 LLT DefDstTy = MRI.getType(DefDstReg);
2164 Register StoreSrcReg = I.getOperand(0).getReg();
2165 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2166
2167 // If we get something strange like a physical register, then we shouldn't
2168 // go any further.
2169 if (!DefDstTy.isValid())
2170 return false;
2171
2172 // Are the source and dst types the same size?
2173 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2174 return false;
2175
2176 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2177 RBI.getRegBank(DefDstReg, MRI, TRI))
2178 return false;
2179
2180 // We have a cross-bank copy, which is entering a store. Let's fold it.
2181 I.getOperand(0).setReg(DefDstReg);
2182 return true;
2183}
2184
2185bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2186 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2186, __extension__ __PRETTY_FUNCTION__))
;
2187 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2187, __extension__ __PRETTY_FUNCTION__))
;
2188
2189 MachineBasicBlock &MBB = *I.getParent();
2190 MachineFunction &MF = *MBB.getParent();
2191 MachineRegisterInfo &MRI = MF.getRegInfo();
2192
2193 switch (I.getOpcode()) {
2194 case AArch64::G_DUP: {
2195 // Before selecting a DUP instruction, check if it is better selected as a
2196 // MOV or load from a constant pool.
2197 Register Src = I.getOperand(1).getReg();
2198 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2199 if (!ValAndVReg)
2200 return false;
2201 LLVMContext &Ctx = MF.getFunction().getContext();
2202 Register Dst = I.getOperand(0).getReg();
2203 auto *CV = ConstantDataVector::getSplat(
2204 MRI.getType(Dst).getNumElements(),
2205 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2206 ValAndVReg->Value));
2207 if (!emitConstantVector(Dst, CV, MIB, MRI))
2208 return false;
2209 I.eraseFromParent();
2210 return true;
2211 }
2212 case TargetOpcode::G_SEXT:
2213 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2214 // over a normal extend.
2215 if (selectUSMovFromExtend(I, MRI))
2216 return true;
2217 return false;
2218 case TargetOpcode::G_BR:
2219 return false;
2220 case TargetOpcode::G_SHL:
2221 return earlySelectSHL(I, MRI);
2222 case TargetOpcode::G_CONSTANT: {
2223 bool IsZero = false;
2224 if (I.getOperand(1).isCImm())
2225 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2226 else if (I.getOperand(1).isImm())
2227 IsZero = I.getOperand(1).getImm() == 0;
2228
2229 if (!IsZero)
2230 return false;
2231
2232 Register DefReg = I.getOperand(0).getReg();
2233 LLT Ty = MRI.getType(DefReg);
2234 if (Ty.getSizeInBits() == 64) {
2235 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2236 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2237 } else if (Ty.getSizeInBits() == 32) {
2238 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2239 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2240 } else
2241 return false;
2242
2243 I.setDesc(TII.get(TargetOpcode::COPY));
2244 return true;
2245 }
2246
2247 case TargetOpcode::G_ADD: {
2248 // Check if this is being fed by a G_ICMP on either side.
2249 //
2250 // (cmp pred, x, y) + z
2251 //
2252 // In the above case, when the cmp is true, we increment z by 1. So, we can
2253 // fold the add into the cset for the cmp by using cinc.
2254 //
2255 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2256 Register AddDst = I.getOperand(0).getReg();
2257 Register AddLHS = I.getOperand(1).getReg();
2258 Register AddRHS = I.getOperand(2).getReg();
2259 // Only handle scalars.
2260 LLT Ty = MRI.getType(AddLHS);
2261 if (Ty.isVector())
2262 return false;
2263 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2264 // bits.
2265 unsigned Size = Ty.getSizeInBits();
2266 if (Size != 32 && Size != 64)
2267 return false;
2268 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2269 if (!MRI.hasOneNonDBGUse(Reg))
2270 return nullptr;
2271 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2272 // compare.
2273 if (Size == 32)
2274 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2275 // We model scalar compares using 32-bit destinations right now.
2276 // If it's a 64-bit compare, it'll have 64-bit sources.
2277 Register ZExt;
2278 if (!mi_match(Reg, MRI,
2279 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2280 return nullptr;
2281 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2282 if (!Cmp ||
2283 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2284 return nullptr;
2285 return Cmp;
2286 };
2287 // Try to match
2288 // z + (cmp pred, x, y)
2289 MachineInstr *Cmp = MatchCmp(AddRHS);
2290 if (!Cmp) {
2291 // (cmp pred, x, y) + z
2292 std::swap(AddLHS, AddRHS);
2293 Cmp = MatchCmp(AddRHS);
2294 if (!Cmp)
2295 return false;
2296 }
2297 auto &PredOp = Cmp->getOperand(1);
2298 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2299 const AArch64CC::CondCode InvCC =
2300 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2301 MIB.setInstrAndDebugLoc(I);
2302 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2303 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2304 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2305 I.eraseFromParent();
2306 return true;
2307 }
2308 case TargetOpcode::G_OR: {
2309 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2310 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2311 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2312 Register Dst = I.getOperand(0).getReg();
2313 LLT Ty = MRI.getType(Dst);
2314
2315 if (!Ty.isScalar())
2316 return false;
2317
2318 unsigned Size = Ty.getSizeInBits();
2319 if (Size != 32 && Size != 64)
2320 return false;
2321
2322 Register ShiftSrc;
2323 int64_t ShiftImm;
2324 Register MaskSrc;
2325 int64_t MaskImm;
2326 if (!mi_match(
2327 Dst, MRI,
2328 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2329 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2330 return false;
2331
2332 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2333 return false;
2334
2335 int64_t Immr = Size - ShiftImm;
2336 int64_t Imms = Size - ShiftImm - 1;
2337 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2338 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2339 I.eraseFromParent();
2340 return true;
2341 }
2342 case TargetOpcode::G_FENCE: {
2343 if (I.getOperand(1).getImm() == 0)
2344 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::CompilerBarrier))
2345 .addImm(I.getOperand(0).getImm());
2346 else
2347 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2348 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2349 I.eraseFromParent();
2350 return true;
2351 }
2352 default:
2353 return false;
2354 }
2355}
2356
2357bool AArch64InstructionSelector::select(MachineInstr &I) {
2358 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2358, __extension__ __PRETTY_FUNCTION__))
;
2359 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2359, __extension__ __PRETTY_FUNCTION__))
;
2360
2361 MachineBasicBlock &MBB = *I.getParent();
2362 MachineFunction &MF = *MBB.getParent();
2363 MachineRegisterInfo &MRI = MF.getRegInfo();
2364
2365 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2366 if (Subtarget->requiresStrictAlign()) {
2367 // We don't support this feature yet.
2368 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2369 return false;
2370 }
2371
2372 MIB.setInstrAndDebugLoc(I);
2373
2374 unsigned Opcode = I.getOpcode();
2375 // G_PHI requires same handling as PHI
2376 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2377 // Certain non-generic instructions also need some special handling.
2378
2379 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2380 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2381
2382 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2383 const Register DefReg = I.getOperand(0).getReg();
2384 const LLT DefTy = MRI.getType(DefReg);
2385
2386 const RegClassOrRegBank &RegClassOrBank =
2387 MRI.getRegClassOrRegBank(DefReg);
2388
2389 const TargetRegisterClass *DefRC
2390 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2391 if (!DefRC) {
2392 if (!DefTy.isValid()) {
2393 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2394 return false;
2395 }
2396 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2397 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2398 if (!DefRC) {
2399 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2400 return false;
2401 }
2402 }
2403
2404 I.setDesc(TII.get(TargetOpcode::PHI));
2405
2406 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2407 }
2408
2409 if (I.isCopy())
2410 return selectCopy(I, TII, MRI, TRI, RBI);
2411
2412 if (I.isDebugInstr())
2413 return selectDebugInstr(I, MRI, RBI);
2414
2415 return true;
2416 }
2417
2418
2419 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2420 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2421 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2422 return false;
2423 }
2424
2425 // Try to do some lowering before we start instruction selecting. These
2426 // lowerings are purely transformations on the input G_MIR and so selection
2427 // must continue after any modification of the instruction.
2428 if (preISelLower(I)) {
2429 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2430 }
2431
2432 // There may be patterns where the importer can't deal with them optimally,
2433 // but does select it to a suboptimal sequence so our custom C++ selection
2434 // code later never has a chance to work on it. Therefore, we have an early
2435 // selection attempt here to give priority to certain selection routines
2436 // over the imported ones.
2437 if (earlySelect(I))
2438 return true;
2439
2440 if (selectImpl(I, *CoverageInfo))
2441 return true;
2442
2443 LLT Ty =
2444 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2445
2446 switch (Opcode) {
2447 case TargetOpcode::G_SBFX:
2448 case TargetOpcode::G_UBFX: {
2449 static const unsigned OpcTable[2][2] = {
2450 {AArch64::UBFMWri, AArch64::UBFMXri},
2451 {AArch64::SBFMWri, AArch64::SBFMXri}};
2452 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2453 unsigned Size = Ty.getSizeInBits();
2454 unsigned Opc = OpcTable[IsSigned][Size == 64];
2455 auto Cst1 =
2456 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2457 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2457, __extension__ __PRETTY_FUNCTION__))
;
2458 auto Cst2 =
2459 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2460 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2460, __extension__ __PRETTY_FUNCTION__))
;
2461 auto LSB = Cst1->Value.getZExtValue();
2462 auto Width = Cst2->Value.getZExtValue();
2463 auto BitfieldInst =
2464 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2465 .addImm(LSB)
2466 .addImm(LSB + Width - 1);
2467 I.eraseFromParent();
2468 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2469 }
2470 case TargetOpcode::G_BRCOND:
2471 return selectCompareBranch(I, MF, MRI);
2472
2473 case TargetOpcode::G_BRINDIRECT: {
2474 I.setDesc(TII.get(AArch64::BR));
2475 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2476 }
2477
2478 case TargetOpcode::G_BRJT:
2479 return selectBrJT(I, MRI);
2480
2481 case AArch64::G_ADD_LOW: {
2482 // This op may have been separated from it's ADRP companion by the localizer
2483 // or some other code motion pass. Given that many CPUs will try to
2484 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2485 // which will later be expanded into an ADRP+ADD pair after scheduling.
2486 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2487 if (BaseMI->getOpcode() != AArch64::ADRP) {
2488 I.setDesc(TII.get(AArch64::ADDXri));
2489 I.addOperand(MachineOperand::CreateImm(0));
2490 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2491 }
2492 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
2493 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
;
2494 auto Op1 = BaseMI->getOperand(1);
2495 auto Op2 = I.getOperand(2);
2496 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2497 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2498 Op1.getTargetFlags())
2499 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2500 Op2.getTargetFlags());
2501 I.eraseFromParent();
2502 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2503 }
2504
2505 case TargetOpcode::G_BSWAP: {
2506 // Handle vector types for G_BSWAP directly.
2507 Register DstReg = I.getOperand(0).getReg();
2508 LLT DstTy = MRI.getType(DstReg);
2509
2510 // We should only get vector types here; everything else is handled by the
2511 // importer right now.
2512 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2513 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2514 return false;
2515 }
2516
2517 // Only handle 4 and 2 element vectors for now.
2518 // TODO: 16-bit elements.
2519 unsigned NumElts = DstTy.getNumElements();
2520 if (NumElts != 4 && NumElts != 2) {
2521 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2522 return false;
2523 }
2524
2525 // Choose the correct opcode for the supported types. Right now, that's
2526 // v2s32, v4s32, and v2s64.
2527 unsigned Opc = 0;
2528 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2529 if (EltSize == 32)
2530 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2531 : AArch64::REV32v16i8;
2532 else if (EltSize == 64)
2533 Opc = AArch64::REV64v16i8;
2534
2535 // We should always get something by the time we get here...
2536 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2536, __extension__ __PRETTY_FUNCTION__))
;
2537
2538 I.setDesc(TII.get(Opc));
2539 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2540 }
2541
2542 case TargetOpcode::G_FCONSTANT:
2543 case TargetOpcode::G_CONSTANT: {
2544 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2545
2546 const LLT s8 = LLT::scalar(8);
2547 const LLT s16 = LLT::scalar(16);
2548 const LLT s32 = LLT::scalar(32);
2549 const LLT s64 = LLT::scalar(64);
2550 const LLT s128 = LLT::scalar(128);
2551 const LLT p0 = LLT::pointer(0, 64);
2552
2553 const Register DefReg = I.getOperand(0).getReg();
2554 const LLT DefTy = MRI.getType(DefReg);
2555 const unsigned DefSize = DefTy.getSizeInBits();
2556 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2557
2558 // FIXME: Redundant check, but even less readable when factored out.
2559 if (isFP) {
2560 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2561 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2562 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2563 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2564 return false;
2565 }
2566
2567 if (RB.getID() != AArch64::FPRRegBankID) {
2568 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2569 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2570 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2571 return false;
2572 }
2573
2574 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2575 // can be sure tablegen works correctly and isn't rescued by this code.
2576 // 0.0 is not covered by tablegen for FP128. So we will handle this
2577 // scenario in the code here.
2578 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2579 return false;
2580 } else {
2581 // s32 and s64 are covered by tablegen.
2582 if (Ty != p0 && Ty != s8 && Ty != s16) {
2583 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2584 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2585 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2586 return false;
2587 }
2588
2589 if (RB.getID() != AArch64::GPRRegBankID) {
2590 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2591 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2592 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2593 return false;
2594 }
2595 }
2596
2597 if (isFP) {
2598 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2599 // For 16, 64, and 128b values, emit a constant pool load.
2600 switch (DefSize) {
2601 default:
2602 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2602)
;
2603 case 32:
2604 // For s32, use a cp load if we have optsize/minsize.
2605 if (!shouldOptForSize(&MF))
2606 break;
2607 [[fallthrough]];
2608 case 16:
2609 case 64:
2610 case 128: {
2611 auto *FPImm = I.getOperand(1).getFPImm();
2612 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2613 if (!LoadMI) {
2614 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2615 return false;
2616 }
2617 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2618 I.eraseFromParent();
2619 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2620 }
2621 }
2622
2623 // Either emit a FMOV, or emit a copy to emit a normal mov.
2624 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
2625 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
;
2626 const Register DefGPRReg =
2627 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2628 MachineOperand &RegOp = I.getOperand(0);
2629 RegOp.setReg(DefGPRReg);
2630 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2631 MIB.buildCopy({DefReg}, {DefGPRReg});
2632
2633 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2634 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2635 return false;
2636 }
2637
2638 MachineOperand &ImmOp = I.getOperand(1);
2639 // FIXME: Is going through int64_t always correct?
2640 ImmOp.ChangeToImmediate(
2641 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2642 } else if (I.getOperand(1).isCImm()) {
2643 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2644 I.getOperand(1).ChangeToImmediate(Val);
2645 } else if (I.getOperand(1).isImm()) {
2646 uint64_t Val = I.getOperand(1).getImm();
2647 I.getOperand(1).ChangeToImmediate(Val);
2648 }
2649
2650 const unsigned MovOpc =
2651 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2652 I.setDesc(TII.get(MovOpc));
2653 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2654 return true;
2655 }
2656 case TargetOpcode::G_EXTRACT: {
2657 Register DstReg = I.getOperand(0).getReg();
2658 Register SrcReg = I.getOperand(1).getReg();
2659 LLT SrcTy = MRI.getType(SrcReg);
2660 LLT DstTy = MRI.getType(DstReg);
2661 (void)DstTy;
2662 unsigned SrcSize = SrcTy.getSizeInBits();
2663
2664 if (SrcTy.getSizeInBits() > 64) {
2665 // This should be an extract of an s128, which is like a vector extract.
2666 if (SrcTy.getSizeInBits() != 128)
2667 return false;
2668 // Only support extracting 64 bits from an s128 at the moment.
2669 if (DstTy.getSizeInBits() != 64)
2670 return false;
2671
2672 unsigned Offset = I.getOperand(2).getImm();
2673 if (Offset % 64 != 0)
2674 return false;
2675
2676 // Check we have the right regbank always.
2677 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2678 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2679 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2679, __extension__ __PRETTY_FUNCTION__))
;
2680
2681 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2682 auto NewI =
2683 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2684 .addUse(SrcReg, 0,
2685 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2686 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2687 AArch64::GPR64RegClass, NewI->getOperand(0));
2688 I.eraseFromParent();
2689 return true;
2690 }
2691
2692 // Emit the same code as a vector extract.
2693 // Offset must be a multiple of 64.
2694 unsigned LaneIdx = Offset / 64;
2695 MachineInstr *Extract = emitExtractVectorElt(
2696 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2697 if (!Extract)
2698 return false;
2699 I.eraseFromParent();
2700 return true;
2701 }
2702
2703 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2704 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2705 Ty.getSizeInBits() - 1);
2706
2707 if (SrcSize < 64) {
2708 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __extension__ __PRETTY_FUNCTION__))
2709 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __extension__ __PRETTY_FUNCTION__))
;
2710 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2711 }
2712
2713 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2714 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2715 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2716 .addReg(DstReg, 0, AArch64::sub_32);
2717 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2718 AArch64::GPR32RegClass, MRI);
2719 I.getOperand(0).setReg(DstReg);
2720
2721 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2722 }
2723
2724 case TargetOpcode::G_INSERT: {
2725 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2726 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2727 unsigned DstSize = DstTy.getSizeInBits();
2728 // Larger inserts are vectors, same-size ones should be something else by
2729 // now (split up or turned into COPYs).
2730 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2731 return false;
2732
2733 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2734 unsigned LSB = I.getOperand(3).getImm();
2735 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2736 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2737 MachineInstrBuilder(MF, I).addImm(Width - 1);
2738
2739 if (DstSize < 64) {
2740 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2741, __extension__ __PRETTY_FUNCTION__))
2741 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2741, __extension__ __PRETTY_FUNCTION__))
;
2742 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2743 }
2744
2745 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2746 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2747 TII.get(AArch64::SUBREG_TO_REG))
2748 .addDef(SrcReg)
2749 .addImm(0)
2750 .addUse(I.getOperand(2).getReg())
2751 .addImm(AArch64::sub_32);
2752 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2753 AArch64::GPR32RegClass, MRI);
2754 I.getOperand(2).setReg(SrcReg);
2755
2756 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2757 }
2758 case TargetOpcode::G_FRAME_INDEX: {
2759 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2760 if (Ty != LLT::pointer(0, 64)) {
2761 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2762 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2763 return false;
2764 }
2765 I.setDesc(TII.get(AArch64::ADDXri));
2766
2767 // MOs for a #0 shifted immediate.
2768 I.addOperand(MachineOperand::CreateImm(0));
2769 I.addOperand(MachineOperand::CreateImm(0));
2770
2771 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2772 }
2773
2774 case TargetOpcode::G_GLOBAL_VALUE: {
2775 auto GV = I.getOperand(1).getGlobal();
2776 if (GV->isThreadLocal())
2777 return selectTLSGlobalValue(I, MRI);
2778
2779 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2780 if (OpFlags & AArch64II::MO_GOT) {
2781 I.setDesc(TII.get(AArch64::LOADgot));
2782 I.getOperand(1).setTargetFlags(OpFlags);
2783 } else if (TM.getCodeModel() == CodeModel::Large) {
2784 // Materialize the global using movz/movk instructions.
2785 materializeLargeCMVal(I, GV, OpFlags);
2786 I.eraseFromParent();
2787 return true;
2788 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2789 I.setDesc(TII.get(AArch64::ADR));
2790 I.getOperand(1).setTargetFlags(OpFlags);
2791 } else {
2792 I.setDesc(TII.get(AArch64::MOVaddr));
2793 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2794 MachineInstrBuilder MIB(MF, I);
2795 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2796 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2797 }
2798 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2799 }
2800
2801 case TargetOpcode::G_ZEXTLOAD:
2802 case TargetOpcode::G_LOAD:
2803 case TargetOpcode::G_STORE: {
2804 GLoadStore &LdSt = cast<GLoadStore>(I);
2805 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2806 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2807
2808 if (PtrTy != LLT::pointer(0, 64)) {
2809 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2810 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2811 return false;
2812 }
2813
2814 uint64_t MemSizeInBytes = LdSt.getMemSize();
2815 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2816 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2817
2818 // Need special instructions for atomics that affect ordering.
2819 if (Order != AtomicOrdering::NotAtomic &&
2820 Order != AtomicOrdering::Unordered &&
2821 Order != AtomicOrdering::Monotonic) {
2822 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2822, __extension__ __PRETTY_FUNCTION__))
;
2823 if (MemSizeInBytes > 64)
2824 return false;
2825
2826 if (isa<GLoad>(LdSt)) {
2827 static constexpr unsigned LDAPROpcodes[] = {
2828 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2829 static constexpr unsigned LDAROpcodes[] = {
2830 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2831 ArrayRef<unsigned> Opcodes =
2832 STI.hasLDAPR() && Order != AtomicOrdering::SequentiallyConsistent
2833 ? LDAPROpcodes
2834 : LDAROpcodes;
2835 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2836 } else {
2837 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2838 AArch64::STLRW, AArch64::STLRX};
2839 Register ValReg = LdSt.getReg(0);
2840 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2841 // Emit a subreg copy of 32 bits.
2842 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2843 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2844 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2845 I.getOperand(0).setReg(NewVal);
2846 }
2847 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2848 }
2849 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2850 return true;
2851 }
2852
2853#ifndef NDEBUG
2854 const Register PtrReg = LdSt.getPointerReg();
2855 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2856 // Check that the pointer register is valid.
2857 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2858, __extension__ __PRETTY_FUNCTION__))
2858 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2858, __extension__ __PRETTY_FUNCTION__))
;
2859 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2860, __extension__ __PRETTY_FUNCTION__))
2860 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2860, __extension__ __PRETTY_FUNCTION__))
;
2861#endif
2862
2863 const Register ValReg = LdSt.getReg(0);
2864 const LLT ValTy = MRI.getType(ValReg);
2865 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2866
2867 // The code below doesn't support truncating stores, so we need to split it
2868 // again.
2869 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2870 unsigned SubReg;
2871 LLT MemTy = LdSt.getMMO().getMemoryType();
2872 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2873 if (!getSubRegForClass(RC, TRI, SubReg))
2874 return false;
2875
2876 // Generate a subreg copy.
2877 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2878 .addReg(ValReg, 0, SubReg)
2879 .getReg(0);
2880 RBI.constrainGenericRegister(Copy, *RC, MRI);
2881 LdSt.getOperand(0).setReg(Copy);
2882 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2883 // If this is an any-extending load from the FPR bank, split it into a regular
2884 // load + extend.
2885 if (RB.getID() == AArch64::FPRRegBankID) {
2886 unsigned SubReg;
2887 LLT MemTy = LdSt.getMMO().getMemoryType();
2888 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2889 if (!getSubRegForClass(RC, TRI, SubReg))
2890 return false;
2891 Register OldDst = LdSt.getReg(0);
2892 Register NewDst =
2893 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2894 LdSt.getOperand(0).setReg(NewDst);
2895 MRI.setRegBank(NewDst, RB);
2896 // Generate a SUBREG_TO_REG to extend it.
2897 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2898 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2899 .addImm(0)
2900 .addUse(NewDst)
2901 .addImm(SubReg);
2902 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2903 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2904 MIB.setInstr(LdSt);
2905 }
2906 }
2907
2908 // Helper lambda for partially selecting I. Either returns the original
2909 // instruction with an updated opcode, or a new instruction.
2910 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2911 bool IsStore = isa<GStore>(I);
1
Assuming 'I' is not a 'GStore'
2912 const unsigned NewOpc =
2913 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2914 if (NewOpc == I.getOpcode())
2
Taking false branch
2915 return nullptr;
2916 // Check if we can fold anything into the addressing mode.
2917 auto AddrModeFns =
2918 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2919 if (!AddrModeFns) {
2920 // Can't fold anything. Use the original instruction.
2921 I.setDesc(TII.get(NewOpc));
2922 I.addOperand(MachineOperand::CreateImm(0));
2923 return &I;
2924 }
2925
2926 // Folded something. Create a new instruction and return it.
2927 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2928 Register CurValReg = I.getOperand(0).getReg();
2929 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2930 NewInst.cloneMemRefs(I);
2931 for (auto &Fn : *AddrModeFns)
2932 Fn(NewInst);
2933 I.eraseFromParent();
2934 return &*NewInst;
2935 };
2936
2937 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2938 if (!LoadStore)
2939 return false;
2940
2941 // If we're storing a 0, use WZR/XZR.
2942 if (Opcode == TargetOpcode::G_STORE) {
2943 auto CVal = getIConstantVRegValWithLookThrough(
2944 LoadStore->getOperand(0).getReg(), MRI);
2945 if (CVal && CVal->Value == 0) {
2946 switch (LoadStore->getOpcode()) {
2947 case AArch64::STRWui:
2948 case AArch64::STRHHui:
2949 case AArch64::STRBBui:
2950 LoadStore->getOperand(0).setReg(AArch64::WZR);
2951 break;
2952 case AArch64::STRXui:
2953 LoadStore->getOperand(0).setReg(AArch64::XZR);
2954 break;
2955 }
2956 }
2957 }
2958
2959 if (IsZExtLoad) {
2960 // The zextload from a smaller type to i32 should be handled by the
2961 // importer.
2962 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2963 return false;
2964 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2965 // and zero_extend with SUBREG_TO_REG.
2966 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2967 Register DstReg = LoadStore->getOperand(0).getReg();
2968 LoadStore->getOperand(0).setReg(LdReg);
2969
2970 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2971 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2972 .addImm(0)
2973 .addUse(LdReg)
2974 .addImm(AArch64::sub_32);
2975 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2976 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2977 MRI);
2978 }
2979 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2980 }
2981
2982 case TargetOpcode::G_SMULH:
2983 case TargetOpcode::G_UMULH: {
2984 // Reject the various things we don't support yet.
2985 if (unsupportedBinOp(I, RBI, MRI, TRI))
2986 return false;
2987
2988 const Register DefReg = I.getOperand(0).getReg();
2989 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2990
2991 if (RB.getID() != AArch64::GPRRegBankID) {
2992 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2993 return false;
2994 }
2995
2996 if (Ty != LLT::scalar(64)) {
2997 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2998 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2999 return false;
3000 }
3001
3002 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3003 : AArch64::UMULHrr;
3004 I.setDesc(TII.get(NewOpc));
3005
3006 // Now that we selected an opcode, we need to constrain the register
3007 // operands to use appropriate classes.
3008 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3009 }
3010 case TargetOpcode::G_LSHR:
3011 case TargetOpcode::G_ASHR:
3012 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3013 return selectVectorAshrLshr(I, MRI);
3014 [[fallthrough]];
3015 case TargetOpcode::G_SHL:
3016 if (Opcode == TargetOpcode::G_SHL &&
3017 MRI.getType(I.getOperand(0).getReg()).isVector())
3018 return selectVectorSHL(I, MRI);
3019
3020 // These shifts were legalized to have 64 bit shift amounts because we
3021 // want to take advantage of the selection patterns that assume the
3022 // immediates are s64s, however, selectBinaryOp will assume both operands
3023 // will have the same bit size.
3024 {
3025 Register SrcReg = I.getOperand(1).getReg();
3026 Register ShiftReg = I.getOperand(2).getReg();
3027 const LLT ShiftTy = MRI.getType(ShiftReg);
3028 const LLT SrcTy = MRI.getType(SrcReg);
3029 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3030 ShiftTy.getSizeInBits() == 64) {
3031 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3031, __extension__ __PRETTY_FUNCTION__))
;
3032 // Insert a subregister copy to implement a 64->32 trunc
3033 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3034 .addReg(ShiftReg, 0, AArch64::sub_32);
3035 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3036 I.getOperand(2).setReg(Trunc.getReg(0));
3037 }
3038 }
3039 [[fallthrough]];
3040 case TargetOpcode::G_OR: {
3041 // Reject the various things we don't support yet.
3042 if (unsupportedBinOp(I, RBI, MRI, TRI))
3043 return false;
3044
3045 const unsigned OpSize = Ty.getSizeInBits();
3046
3047 const Register DefReg = I.getOperand(0).getReg();
3048 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3049
3050 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3051 if (NewOpc == I.getOpcode())
3052 return false;
3053
3054 I.setDesc(TII.get(NewOpc));
3055 // FIXME: Should the type be always reset in setDesc?
3056
3057 // Now that we selected an opcode, we need to constrain the register
3058 // operands to use appropriate classes.
3059 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3060 }
3061
3062 case TargetOpcode::G_PTR_ADD: {
3063 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3064 I.eraseFromParent();
3065 return true;
3066 }
3067 case TargetOpcode::G_SADDO:
3068 case TargetOpcode::G_UADDO:
3069 case TargetOpcode::G_SSUBO:
3070 case TargetOpcode::G_USUBO: {
3071 // Emit the operation and get the correct condition code.
3072 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3073 I.getOperand(2), I.getOperand(3), MIB);
3074
3075 // Now, put the overflow result in the register given by the first operand
3076 // to the overflow op. CSINC increments the result when the predicate is
3077 // false, so to get the increment when it's true, we need to use the
3078 // inverse. In this case, we want to increment when carry is set.
3079 Register ZReg = AArch64::WZR;
3080 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3081 getInvertedCondCode(OpAndCC.second), MIB);
3082 I.eraseFromParent();
3083 return true;
3084 }
3085
3086 case TargetOpcode::G_PTRMASK: {
3087 Register MaskReg = I.getOperand(2).getReg();
3088 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3089 // TODO: Implement arbitrary cases
3090 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3091 return false;
3092
3093 uint64_t Mask = *MaskVal;
3094 I.setDesc(TII.get(AArch64::ANDXri));
3095 I.getOperand(2).ChangeToImmediate(
3096 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3097
3098 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3099 }
3100 case TargetOpcode::G_PTRTOINT:
3101 case TargetOpcode::G_TRUNC: {
3102 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3103 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3104
3105 const Register DstReg = I.getOperand(0).getReg();
3106 const Register SrcReg = I.getOperand(1).getReg();
3107
3108 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3109 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3110
3111 if (DstRB.getID() != SrcRB.getID()) {
3112 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3113 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3114 return false;
3115 }
3116
3117 if (DstRB.getID() == AArch64::GPRRegBankID) {
3118 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3119 if (!DstRC)
3120 return false;
3121
3122 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3123 if (!SrcRC)
3124 return false;
3125
3126 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3127 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3128 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3129 return false;
3130 }
3131
3132 if (DstRC == SrcRC) {
3133 // Nothing to be done
3134 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3135 SrcTy == LLT::scalar(64)) {
3136 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3136)
;
3137 return false;
3138 } else if (DstRC == &AArch64::GPR32RegClass &&
3139 SrcRC == &AArch64::GPR64RegClass) {
3140 I.getOperand(1).setSubReg(AArch64::sub_32);
3141 } else {
3142 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3143 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3144 return false;
3145 }
3146
3147 I.setDesc(TII.get(TargetOpcode::COPY));
3148 return true;
3149 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3150 if (DstTy == LLT::fixed_vector(4, 16) &&
3151 SrcTy == LLT::fixed_vector(4, 32)) {
3152 I.setDesc(TII.get(AArch64::XTNv4i16));
3153 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3154 return true;
3155 }
3156
3157 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3158 MachineInstr *Extract = emitExtractVectorElt(
3159 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3160 if (!Extract)
3161 return false;
3162 I.eraseFromParent();
3163 return true;
3164 }
3165
3166 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3167 if (Opcode == TargetOpcode::G_PTRTOINT) {
3168 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3168, __extension__ __PRETTY_FUNCTION__))
;
3169 I.setDesc(TII.get(TargetOpcode::COPY));
3170 return selectCopy(I, TII, MRI, TRI, RBI);
3171 }
3172 }
3173
3174 return false;
3175 }
3176
3177 case TargetOpcode::G_ANYEXT: {
3178 if (selectUSMovFromExtend(I, MRI))
3179 return true;
3180
3181 const Register DstReg = I.getOperand(0).getReg();
3182 const Register SrcReg = I.getOperand(1).getReg();
3183
3184 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3185 if (RBDst.getID() != AArch64::GPRRegBankID) {
3186 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3187 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3188 return false;
3189 }
3190
3191 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3192 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3193 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3194 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3195 return false;
3196 }
3197
3198 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3199
3200 if (DstSize == 0) {
3201 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3202 return false;
3203 }
3204
3205 if (DstSize != 64 && DstSize > 32) {
3206 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3207 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3208 return false;
3209 }
3210 // At this point G_ANYEXT is just like a plain COPY, but we need
3211 // to explicitly form the 64-bit value if any.
3212 if (DstSize > 32) {
3213 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3214 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3215 .addDef(ExtSrc)
3216 .addImm(0)
3217 .addUse(SrcReg)
3218 .addImm(AArch64::sub_32);
3219 I.getOperand(1).setReg(ExtSrc);
3220 }
3221 return selectCopy(I, TII, MRI, TRI, RBI);
3222 }
3223
3224 case TargetOpcode::G_ZEXT:
3225 case TargetOpcode::G_SEXT_INREG:
3226 case TargetOpcode::G_SEXT: {
3227 if (selectUSMovFromExtend(I, MRI))
3228 return true;
3229
3230 unsigned Opcode = I.getOpcode();
3231 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3232 const Register DefReg = I.getOperand(0).getReg();
3233 Register SrcReg = I.getOperand(1).getReg();
3234 const LLT DstTy = MRI.getType(DefReg);
3235 const LLT SrcTy = MRI.getType(SrcReg);
3236 unsigned DstSize = DstTy.getSizeInBits();
3237 unsigned SrcSize = SrcTy.getSizeInBits();
3238
3239 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3240 // extended is encoded in the imm.
3241 if (Opcode == TargetOpcode::G_SEXT_INREG)
3242 SrcSize = I.getOperand(2).getImm();
3243
3244 if (DstTy.isVector())
3245 return false; // Should be handled by imported patterns.
3246
3247 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
3248 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
3249 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
;
3250
3251 MachineInstr *ExtI;
3252
3253 // First check if we're extending the result of a load which has a dest type
3254 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3255 // GPR register on AArch64 and all loads which are smaller automatically
3256 // zero-extend the upper bits. E.g.
3257 // %v(s8) = G_LOAD %p, :: (load 1)
3258 // %v2(s32) = G_ZEXT %v(s8)
3259 if (!IsSigned) {
3260 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3261 bool IsGPR =
3262 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3263 if (LoadMI && IsGPR) {
3264 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3265 unsigned BytesLoaded = MemOp->getSize();
3266 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3267 return selectCopy(I, TII, MRI, TRI, RBI);
3268 }
3269
3270 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3271 // + SUBREG_TO_REG.
3272 //
3273 // If we are zero extending from 32 bits to 64 bits, it's possible that
3274 // the instruction implicitly does the zero extend for us. In that case,
3275 // we only need the SUBREG_TO_REG.
3276 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3277 // Unlike with the G_LOAD case, we don't want to look through copies
3278 // here. (See isDef32.)
3279 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3280 Register SubregToRegSrc = SrcReg;
3281
3282 // Does the instruction implicitly zero extend?
3283 if (!Def || !isDef32(*Def)) {
3284 // No. Zero out using an OR.
3285 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3286 const Register ZReg = AArch64::WZR;
3287 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3288 SubregToRegSrc = OrDst;
3289 }
3290
3291 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3292 .addImm(0)
3293 .addUse(SubregToRegSrc)
3294 .addImm(AArch64::sub_32);
3295
3296 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3297 MRI)) {
3298 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3299 return false;
3300 }
3301
3302 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3303 MRI)) {
3304 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3305 return false;
3306 }
3307
3308 I.eraseFromParent();
3309 return true;
3310 }
3311 }
3312
3313 if (DstSize == 64) {
3314 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3315 // FIXME: Can we avoid manually doing this?
3316 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3317 MRI)) {
3318 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3319 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3320 return false;
3321 }
3322 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3323 {&AArch64::GPR64RegClass}, {})
3324 .addImm(0)
3325 .addUse(SrcReg)
3326 .addImm(AArch64::sub_32)
3327 .getReg(0);
3328 }
3329
3330 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3331 {DefReg}, {SrcReg})
3332 .addImm(0)
3333 .addImm(SrcSize - 1);
3334 } else if (DstSize <= 32) {
3335 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3336 {DefReg}, {SrcReg})
3337 .addImm(0)
3338 .addImm(SrcSize - 1);
3339 } else {
3340 return false;
3341 }
3342
3343 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3344 I.eraseFromParent();
3345 return true;
3346 }
3347
3348 case TargetOpcode::G_SITOFP:
3349 case TargetOpcode::G_UITOFP:
3350 case TargetOpcode::G_FPTOSI:
3351 case TargetOpcode::G_FPTOUI: {
3352 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3353 SrcTy = MRI.getType(I.getOperand(1).getReg());
3354 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3355 if (NewOpc == Opcode)
3356 return false;
3357
3358 I.setDesc(TII.get(NewOpc));
3359 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3360 I.setFlags(MachineInstr::NoFPExcept);
3361
3362 return true;
3363 }
3364
3365 case TargetOpcode::G_FREEZE:
3366 return selectCopy(I, TII, MRI, TRI, RBI);
3367
3368 case TargetOpcode::G_INTTOPTR:
3369 // The importer is currently unable to import pointer types since they
3370 // didn't exist in SelectionDAG.
3371 return selectCopy(I, TII, MRI, TRI, RBI);
3372
3373 case TargetOpcode::G_BITCAST:
3374 // Imported SelectionDAG rules can handle every bitcast except those that
3375 // bitcast from a type to the same type. Ideally, these shouldn't occur
3376 // but we might not run an optimizer that deletes them. The other exception
3377 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3378 // of them.
3379 return selectCopy(I, TII, MRI, TRI, RBI);
3380
3381 case TargetOpcode::G_SELECT: {
3382 auto &Sel = cast<GSelect>(I);
3383 const Register CondReg = Sel.getCondReg();
3384 const Register TReg = Sel.getTrueReg();
3385 const Register FReg = Sel.getFalseReg();
3386
3387 if (tryOptSelect(Sel))
3388 return true;
3389
3390 // Make sure to use an unused vreg instead of wzr, so that the peephole
3391 // optimizations will be able to optimize these.
3392 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3393 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3394 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3395 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3396 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3397 return false;
3398 Sel.eraseFromParent();
3399 return true;
3400 }
3401 case TargetOpcode::G_ICMP: {
3402 if (Ty.isVector())
3403 return selectVectorICmp(I, MRI);
3404
3405 if (Ty != LLT::scalar(32)) {
3406 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3407 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3408 return false;
3409 }
3410
3411 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3412 const AArch64CC::CondCode InvCC =
3413 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3414 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3415 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3416 /*Src2=*/AArch64::WZR, InvCC, MIB);
3417 I.eraseFromParent();
3418 return true;
3419 }
3420
3421 case TargetOpcode::G_FCMP: {
3422 CmpInst::Predicate Pred =
3423 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3424 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3425 Pred) ||
3426 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3427 return false;
3428 I.eraseFromParent();
3429 return true;
3430 }
3431 case TargetOpcode::G_VASTART:
3432 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3433 : selectVaStartAAPCS(I, MF, MRI);
3434 case TargetOpcode::G_INTRINSIC:
3435 return selectIntrinsic(I, MRI);
3436 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3437 return selectIntrinsicWithSideEffects(I, MRI);
3438 case TargetOpcode::G_IMPLICIT_DEF: {
3439 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3440 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3441 const Register DstReg = I.getOperand(0).getReg();
3442 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3443 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3444 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3445 return true;
3446 }
3447 case TargetOpcode::G_BLOCK_ADDR: {
3448 if (TM.getCodeModel() == CodeModel::Large) {
3449 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3450 I.eraseFromParent();
3451 return true;
3452 } else {
3453 I.setDesc(TII.get(AArch64::MOVaddrBA));
3454 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3455 I.getOperand(0).getReg())
3456 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3457 /* Offset */ 0, AArch64II::MO_PAGE)
3458 .addBlockAddress(
3459 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3460 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3461 I.eraseFromParent();
3462 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3463 }
3464 }
3465 case AArch64::G_DUP: {
3466 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3467 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3468 // difficult because at RBS we may end up pessimizing the fpr case if we
3469 // decided to add an anyextend to fix this. Manual selection is the most
3470 // robust solution for now.
3471 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3472 AArch64::GPRRegBankID)
3473 return false; // We expect the fpr regbank case to be imported.
3474 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3475 if (VecTy == LLT::fixed_vector(8, 8))
3476 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3477 else if (VecTy == LLT::fixed_vector(16, 8))
3478 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3479 else if (VecTy == LLT::fixed_vector(4, 16))
3480 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3481 else if (VecTy == LLT::fixed_vector(8, 16))
3482 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3483 else
3484 return false;
3485 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3486 }
3487 case TargetOpcode::G_INTRINSIC_TRUNC:
3488 return selectIntrinsicTrunc(I, MRI);
3489 case TargetOpcode::G_INTRINSIC_ROUND:
3490 return selectIntrinsicRound(I, MRI);
3491 case TargetOpcode::G_BUILD_VECTOR:
3492 return selectBuildVector(I, MRI);
3493 case TargetOpcode::G_MERGE_VALUES:
3494 return selectMergeValues(I, MRI);
3495 case TargetOpcode::G_UNMERGE_VALUES:
3496 return selectUnmergeValues(I, MRI);
3497 case TargetOpcode::G_SHUFFLE_VECTOR:
3498 return selectShuffleVector(I, MRI);
3499 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3500 return selectExtractElt(I, MRI);
3501 case TargetOpcode::G_INSERT_VECTOR_ELT:
3502 return selectInsertElt(I, MRI);
3503 case TargetOpcode::G_CONCAT_VECTORS:
3504 return selectConcatVectors(I, MRI);
3505 case TargetOpcode::G_JUMP_TABLE:
3506 return selectJumpTable(I, MRI);
3507 case TargetOpcode::G_VECREDUCE_FADD:
3508 case TargetOpcode::G_VECREDUCE_ADD:
3509 return selectReduction(I, MRI);
3510 case TargetOpcode::G_MEMCPY:
3511 case TargetOpcode::G_MEMCPY_INLINE:
3512 case TargetOpcode::G_MEMMOVE:
3513 case TargetOpcode::G_MEMSET:
3514 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3514, __extension__ __PRETTY_FUNCTION__))
;
3515 return selectMOPS(I, MRI);
3516 }
3517
3518 return false;
3519}
3520
3521bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3522 MachineRegisterInfo &MRI) {
3523 Register VecReg = I.getOperand(1).getReg();
3524 LLT VecTy = MRI.getType(VecReg);
3525 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3526 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3527 // a subregister copy afterwards.
3528 if (VecTy == LLT::fixed_vector(2, 32)) {
3529 Register DstReg = I.getOperand(0).getReg();
3530 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3531 {VecReg, VecReg});
3532 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3533 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3534 .getReg(0);
3535 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3536 I.eraseFromParent();
3537 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3538 }
3539
3540 unsigned Opc = 0;
3541 if (VecTy == LLT::fixed_vector(16, 8))
3542 Opc = AArch64::ADDVv16i8v;
3543 else if (VecTy == LLT::fixed_vector(8, 16))
3544 Opc = AArch64::ADDVv8i16v;
3545 else if (VecTy == LLT::fixed_vector(4, 32))
3546 Opc = AArch64::ADDVv4i32v;
3547 else if (VecTy == LLT::fixed_vector(2, 64))
3548 Opc = AArch64::ADDPv2i64p;
3549 else {
3550 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3551 return false;
3552 }
3553 I.setDesc(TII.get(Opc));
3554 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3555 }
3556
3557 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3558 unsigned Opc = 0;
3559 if (VecTy == LLT::fixed_vector(2, 32))
3560 Opc = AArch64::FADDPv2i32p;
3561 else if (VecTy == LLT::fixed_vector(2, 64))
3562 Opc = AArch64::FADDPv2i64p;
3563 else {
3564 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3565 return false;
3566 }
3567 I.setDesc(TII.get(Opc));
3568 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3569 }
3570 return false;
3571}
3572
3573bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3574 MachineRegisterInfo &MRI) {
3575 unsigned Mopcode;
3576 switch (GI.getOpcode()) {
3577 case TargetOpcode::G_MEMCPY:
3578 case TargetOpcode::G_MEMCPY_INLINE:
3579 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3580 break;
3581 case TargetOpcode::G_MEMMOVE:
3582 Mopcode = AArch64::MOPSMemoryMovePseudo;
3583 break;
3584 case TargetOpcode::G_MEMSET:
3585 // For tagged memset see llvm.aarch64.mops.memset.tag
3586 Mopcode = AArch64::MOPSMemorySetPseudo;
3587 break;
3588 }
3589
3590 auto &DstPtr = GI.getOperand(0);
3591 auto &SrcOrVal = GI.getOperand(1);
3592 auto &Size = GI.getOperand(2);
3593
3594 // Create copies of the registers that can be clobbered.
3595 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3596 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3597 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3598
3599 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3600 const auto &SrcValRegClass =
3601 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3602
3603 // Constrain to specific registers
3604 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3605 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3606 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3607
3608 MIB.buildCopy(DstPtrCopy, DstPtr);
3609 MIB.buildCopy(SrcValCopy, SrcOrVal);
3610 MIB.buildCopy(SizeCopy, Size);
3611
3612 // New instruction uses the copied registers because it must update them.
3613 // The defs are not used since they don't exist in G_MEM*. They are still
3614 // tied.
3615 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3616 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3617 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3618 if (IsSet) {
3619 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3620 {DstPtrCopy, SizeCopy, SrcValCopy});
3621 } else {
3622 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3623 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3624 {DstPtrCopy, SrcValCopy, SizeCopy});
3625 }
3626
3627 GI.eraseFromParent();
3628 return true;
3629}
3630
3631bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3632 MachineRegisterInfo &MRI) {
3633 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3633, __extension__ __PRETTY_FUNCTION__))
;
3634 Register JTAddr = I.getOperand(0).getReg();
3635 unsigned JTI = I.getOperand(1).getIndex();
3636 Register Index = I.getOperand(2).getReg();
3637
3638 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3639 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3640
3641 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3642 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3643 {TargetReg, ScratchReg}, {JTAddr, Index})
3644 .addJumpTableIndex(JTI);
3645 // Build the indirect branch.
3646 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3647 I.eraseFromParent();
3648 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3649}
3650
3651bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3652 MachineRegisterInfo &MRI) {
3653 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3653, __extension__ __PRETTY_FUNCTION__))
;
3654 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3654, __extension__ __PRETTY_FUNCTION__))
;
3655
3656 Register DstReg = I.getOperand(0).getReg();
3657 unsigned JTI = I.getOperand(1).getIndex();
3658 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3659 auto MovMI =
3660 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3661 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3662 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3663 I.eraseFromParent();
3664 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3665}
3666
3667bool AArch64InstructionSelector::selectTLSGlobalValue(
3668 MachineInstr &I, MachineRegisterInfo &MRI) {
3669 if (!STI.isTargetMachO())
3670 return false;
3671 MachineFunction &MF = *I.getParent()->getParent();
3672 MF.getFrameInfo().setAdjustsStack(true);
3673
3674 const auto &GlobalOp = I.getOperand(1);
3675 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
3676 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
;
3677 const GlobalValue &GV = *GlobalOp.getGlobal();
3678
3679 auto LoadGOT =
3680 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3681 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3682
3683 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3684 {LoadGOT.getReg(0)})
3685 .addImm(0);
3686
3687 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3688 // TLS calls preserve all registers except those that absolutely must be
3689 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3690 // silly).
3691 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3692 .addUse(AArch64::X0, RegState::Implicit)
3693 .addDef(AArch64::X0, RegState::Implicit)
3694 .addRegMask(TRI.getTLSCallPreservedMask());
3695
3696 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3697 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3698 MRI);
3699 I.eraseFromParent();
3700 return true;
3701}
3702
3703bool AArch64InstructionSelector::selectIntrinsicTrunc(
3704 MachineInstr &I, MachineRegisterInfo &MRI) const {
3705 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3706
3707 // Select the correct opcode.
3708 unsigned Opc = 0;
3709 if (!SrcTy.isVector()) {
3710 switch (SrcTy.getSizeInBits()) {
3711 default:
3712 case 16:
3713 Opc = AArch64::FRINTZHr;
3714 break;
3715 case 32:
3716 Opc = AArch64::FRINTZSr;
3717 break;
3718 case 64:
3719 Opc = AArch64::FRINTZDr;
3720 break;
3721 }
3722 } else {
3723 unsigned NumElts = SrcTy.getNumElements();
3724 switch (SrcTy.getElementType().getSizeInBits()) {
3725 default:
3726 break;
3727 case 16:
3728 if (NumElts == 4)
3729 Opc = AArch64::FRINTZv4f16;
3730 else if (NumElts == 8)
3731 Opc = AArch64::FRINTZv8f16;
3732 break;
3733 case 32:
3734 if (NumElts == 2)
3735 Opc = AArch64::FRINTZv2f32;
3736 else if (NumElts == 4)
3737 Opc = AArch64::FRINTZv4f32;
3738 break;
3739 case 64:
3740 if (NumElts == 2)
3741 Opc = AArch64::FRINTZv2f64;
3742 break;
3743 }
3744 }
3745
3746 if (!Opc) {
3747 // Didn't get an opcode above, bail.
3748 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3749 return false;
3750 }
3751
3752 // Legalization would have set us up perfectly for this; we just need to
3753 // set the opcode and move on.
3754 I.setDesc(TII.get(Opc));
3755 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3756}
3757
3758bool AArch64InstructionSelector::selectIntrinsicRound(
3759 MachineInstr &I, MachineRegisterInfo &MRI) const {
3760 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3761
3762 // Select the correct opcode.
3763 unsigned Opc = 0;
3764 if (!SrcTy.isVector()) {
3765 switch (SrcTy.getSizeInBits()) {
3766 default:
3767 case 16:
3768 Opc = AArch64::FRINTAHr;
3769 break;
3770 case 32:
3771 Opc = AArch64::FRINTASr;
3772 break;
3773 case 64:
3774 Opc = AArch64::FRINTADr;
3775 break;
3776 }
3777 } else {
3778 unsigned NumElts = SrcTy.getNumElements();
3779 switch (SrcTy.getElementType().getSizeInBits()) {
3780 default:
3781 break;
3782 case 16:
3783 if (NumElts == 4)
3784 Opc = AArch64::FRINTAv4f16;
3785 else if (NumElts == 8)
3786 Opc = AArch64::FRINTAv8f16;
3787 break;
3788 case 32:
3789 if (NumElts == 2)
3790 Opc = AArch64::FRINTAv2f32;
3791 else if (NumElts == 4)
3792 Opc = AArch64::FRINTAv4f32;
3793 break;
3794 case 64:
3795 if (NumElts == 2)
3796 Opc = AArch64::FRINTAv2f64;
3797 break;
3798 }
3799 }
3800
3801 if (!Opc) {
3802 // Didn't get an opcode above, bail.
3803 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3804 return false;
3805 }
3806
3807 // Legalization would have set us up perfectly for this; we just need to
3808 // set the opcode and move on.
3809 I.setDesc(TII.get(Opc));
3810 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3811}
3812
3813bool AArch64InstructionSelector::selectVectorICmp(
3814 MachineInstr &I, MachineRegisterInfo &MRI) {
3815 Register DstReg = I.getOperand(0).getReg();
3816 LLT DstTy = MRI.getType(DstReg);
3817 Register SrcReg = I.getOperand(2).getReg();
3818 Register Src2Reg = I.getOperand(3).getReg();
3819 LLT SrcTy = MRI.getType(SrcReg);
3820
3821 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3822 unsigned NumElts = DstTy.getNumElements();
3823
3824 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3825 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3826 // Third index is cc opcode:
3827 // 0 == eq
3828 // 1 == ugt
3829 // 2 == uge
3830 // 3 == ult
3831 // 4 == ule
3832 // 5 == sgt
3833 // 6 == sge
3834 // 7 == slt
3835 // 8 == sle
3836 // ne is done by negating 'eq' result.
3837
3838 // This table below assumes that for some comparisons the operands will be
3839 // commuted.
3840 // ult op == commute + ugt op
3841 // ule op == commute + uge op
3842 // slt op == commute + sgt op
3843 // sle op == commute + sge op
3844 unsigned PredIdx = 0;
3845 bool SwapOperands = false;
3846 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3847 switch (Pred) {
3848 case CmpInst::ICMP_NE:
3849 case CmpInst::ICMP_EQ:
3850 PredIdx = 0;
3851 break;
3852 case CmpInst::ICMP_UGT:
3853 PredIdx = 1;
3854 break;
3855 case CmpInst::ICMP_UGE:
3856 PredIdx = 2;
3857 break;
3858 case CmpInst::ICMP_ULT:
3859 PredIdx = 3;
3860 SwapOperands = true;
3861 break;
3862 case CmpInst::ICMP_ULE:
3863 PredIdx = 4;
3864 SwapOperands = true;
3865 break;
3866 case CmpInst::ICMP_SGT:
3867 PredIdx = 5;
3868 break;
3869 case CmpInst::ICMP_SGE:
3870 PredIdx = 6;
3871 break;
3872 case CmpInst::ICMP_SLT:
3873 PredIdx = 7;
3874 SwapOperands = true;
3875 break;
3876 case CmpInst::ICMP_SLE:
3877 PredIdx = 8;
3878 SwapOperands = true;
3879 break;
3880 default:
3881 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3881)
;
3882 return false;
3883 }
3884
3885 // This table obviously should be tablegen'd when we have our GISel native
3886 // tablegen selector.
3887
3888 static const unsigned OpcTable[4][4][9] = {
3889 {
3890 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3891 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3892 0 /* invalid */},
3893 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3894 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3895 0 /* invalid */},
3896 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3897 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3898 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3899 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3900 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3901 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3902 },
3903 {
3904 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3905 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3906 0 /* invalid */},
3907 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3908 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3909 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3910 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3911 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3912 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3913 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3914 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3915 0 /* invalid */}
3916 },
3917 {
3918 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3919 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3920 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3921 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3922 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3923 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3924 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3925 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3926 0 /* invalid */},
3927 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3928 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3929 0 /* invalid */}
3930 },
3931 {
3932 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3933 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3934 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3935 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3936 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3937 0 /* invalid */},
3938 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3939 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3940 0 /* invalid */},
3941 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3942 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3943 0 /* invalid */}
3944 },
3945 };
3946 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3947 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3948 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3949 if (!Opc) {
3950 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3951 return false;
3952 }
3953
3954 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3955 const TargetRegisterClass *SrcRC =
3956 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3957 if (!SrcRC) {
3958 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3959 return false;
3960 }
3961
3962 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3963 if (SrcTy.getSizeInBits() == 128)
3964 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3965
3966 if (SwapOperands)
3967 std::swap(SrcReg, Src2Reg);
3968
3969 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3970 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3971
3972 // Invert if we had a 'ne' cc.
3973 if (NotOpc) {
3974 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3975 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3976 } else {
3977 MIB.buildCopy(DstReg, Cmp.getReg(0));
3978 }
3979 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3980 I.eraseFromParent();
3981 return true;
3982}
3983
3984MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3985 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3986 MachineIRBuilder &MIRBuilder) const {
3987 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3988
3989 auto BuildFn = [&](unsigned SubregIndex) {
3990 auto Ins =
3991 MIRBuilder
3992 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3993 .addImm(SubregIndex);
3994 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3995 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3996 return &*Ins;
3997 };
3998
3999 switch (EltSize) {
4000 case 16:
4001 return BuildFn(AArch64::hsub);
4002 case 32:
4003 return BuildFn(AArch64::ssub);
4004 case 64:
4005 return BuildFn(AArch64::dsub);
4006 default:
4007 return nullptr;
4008 }
4009}
4010
4011bool AArch64InstructionSelector::selectMergeValues(
4012 MachineInstr &I, MachineRegisterInfo &MRI) {
4013 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4013, __extension__ __PRETTY_FUNCTION__))
;
4014 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4015 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4016 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4016, __extension__ __PRETTY_FUNCTION__))
;
4017 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4018
4019 if (I.getNumOperands() != 3)
4020 return false;
4021
4022 // Merging 2 s64s into an s128.
4023 if (DstTy == LLT::scalar(128)) {
4024 if (SrcTy.getSizeInBits() != 64)
4025 return false;
4026 Register DstReg = I.getOperand(0).getReg();
4027 Register Src1Reg = I.getOperand(1).getReg();
4028 Register Src2Reg = I.getOperand(2).getReg();
4029 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4030 MachineInstr *InsMI =
4031 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
4032 if (!InsMI)
4033 return false;
4034 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4035 Src2Reg, /* LaneIdx */ 1, RB, MIB);
4036 if (!Ins2MI)
4037 return false;
4038 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4039 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4040 I.eraseFromParent();
4041 return true;
4042 }
4043
4044 if (RB.getID() != AArch64::GPRRegBankID)
4045 return false;
4046
4047 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4048 return false;
4049
4050 auto *DstRC = &AArch64::GPR64RegClass;
4051 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4052 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4053 TII.get(TargetOpcode::SUBREG_TO_REG))
4054 .addDef(SubToRegDef)
4055 .addImm(0)
4056 .addUse(I.getOperand(1).getReg())
4057 .addImm(AArch64::sub_32);
4058 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4059 // Need to anyext the second scalar before we can use bfm
4060 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4061 TII.get(TargetOpcode::SUBREG_TO_REG))
4062 .addDef(SubToRegDef2)
4063 .addImm(0)
4064 .addUse(I.getOperand(2).getReg())
4065 .addImm(AArch64::sub_32);
4066 MachineInstr &BFM =
4067 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4068 .addDef(I.getOperand(0).getReg())
4069 .addUse(SubToRegDef)
4070 .addUse(SubToRegDef2)
4071 .addImm(32)
4072 .addImm(31);
4073 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4074 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4075 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4076 I.eraseFromParent();
4077 return true;
4078}
4079
4080static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4081 const unsigned EltSize) {
4082 // Choose a lane copy opcode and subregister based off of the size of the
4083 // vector's elements.
4084 switch (EltSize) {
4085 case 8:
4086 CopyOpc = AArch64::DUPi8;
4087 ExtractSubReg = AArch64::bsub;
4088 break;
4089 case 16:
4090 CopyOpc = AArch64::DUPi16;
4091 ExtractSubReg = AArch64::hsub;
4092 break;
4093 case 32:
4094 CopyOpc = AArch64::DUPi32;
4095 ExtractSubReg = AArch64::ssub;
4096 break;
4097 case 64:
4098 CopyOpc = AArch64::DUPi64;
4099 ExtractSubReg = AArch64::dsub;
4100 break;
4101 default:
4102 // Unknown size, bail out.
4103 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4104 return false;
4105 }
4106 return true;
4107}
4108
4109MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4110 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4111 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4112 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4113 unsigned CopyOpc = 0;
4114 unsigned ExtractSubReg = 0;
4115 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4116 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4117 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4118 return nullptr;
4119 }
4120
4121 const TargetRegisterClass *DstRC =
4122 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4123 if (!DstRC) {
4124 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4125 return nullptr;
4126 }
4127
4128 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4129 const LLT &VecTy = MRI.getType(VecReg);
4130 const TargetRegisterClass *VecRC =
4131 getRegClassForTypeOnBank(VecTy, VecRB, true);
4132 if (!VecRC) {
4133 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4134 return nullptr;
4135 }
4136
4137 // The register that we're going to copy into.
4138 Register InsertReg = VecReg;
4139 if (!DstReg)
4140 DstReg = MRI.createVirtualRegister(DstRC);
4141 // If the lane index is 0, we just use a subregister COPY.
4142 if (LaneIdx == 0) {
4143 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4144 .addReg(VecReg, 0, ExtractSubReg);
4145 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4146 return &*Copy;
4147 }
4148
4149 // Lane copies require 128-bit wide registers. If we're dealing with an
4150 // unpacked vector, then we need to move up to that width. Insert an implicit
4151 // def and a subregister insert to get us there.
4152 if (VecTy.getSizeInBits() != 128) {
4153 MachineInstr *ScalarToVector = emitScalarToVector(
4154 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4155 if (!ScalarToVector)
4156 return nullptr;
4157 InsertReg = ScalarToVector->getOperand(0).getReg();
4158 }
4159
4160 MachineInstr *LaneCopyMI =
4161 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4162 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4163
4164 // Make sure that we actually constrain the initial copy.
4165 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4166 return LaneCopyMI;
4167}
4168
4169bool AArch64InstructionSelector::selectExtractElt(
4170 MachineInstr &I, MachineRegisterInfo &MRI) {
4171 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __extension__ __PRETTY_FUNCTION__))
4172 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __extension__ __PRETTY_FUNCTION__))
;
4173 Register DstReg = I.getOperand(0).getReg();
4174 const LLT NarrowTy = MRI.getType(DstReg);
4175 const Register SrcReg = I.getOperand(1).getReg();
4176 const LLT WideTy = MRI.getType(SrcReg);
4177 (void)WideTy;
4178 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
4179 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
;
4180 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4180, __extension__ __PRETTY_FUNCTION__))
;
4181
4182 // Need the lane index to determine the correct copy opcode.
4183 MachineOperand &LaneIdxOp = I.getOperand(2);
4184 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4184, __extension__ __PRETTY_FUNCTION__))
;
4185
4186 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4187 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4188 return false;
4189 }
4190
4191 // Find the index to extract from.
4192 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4193 if (!VRegAndVal)
4194 return false;
4195 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4196
4197
4198 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4199 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4200 LaneIdx, MIB);
4201 if (!Extract)
4202 return false;
4203
4204 I.eraseFromParent();
4205 return true;
4206}
4207
4208bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4209 MachineInstr &I, MachineRegisterInfo &MRI) {
4210 unsigned NumElts = I.getNumOperands() - 1;
4211 Register SrcReg = I.getOperand(NumElts).getReg();
4212 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4213 const LLT SrcTy = MRI.getType(SrcReg);
4214
4215 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4215, __extension__ __PRETTY_FUNCTION__))
;
4216 if (SrcTy.getSizeInBits() > 128) {
4217 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4218 return false;
4219 }
4220
4221 // We implement a split vector operation by treating the sub-vectors as
4222 // scalars and extracting them.
4223 const RegisterBank &DstRB =
4224 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4225 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4226 Register Dst = I.getOperand(OpIdx).getReg();
4227 MachineInstr *Extract =
4228 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4229 if (!Extract)
4230 return false;
4231 }
4232 I.eraseFromParent();
4233 return true;
4234}
4235
4236bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4237 MachineRegisterInfo &MRI) {
4238 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4239, __extension__ __PRETTY_FUNCTION__))
4239 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4239, __extension__ __PRETTY_FUNCTION__))
;
4240
4241 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4242 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4243 AArch64::FPRRegBankID ||
4244 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4245 AArch64::FPRRegBankID) {
4246 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4247 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4248 return false;
4249 }
4250
4251 // The last operand is the vector source register, and every other operand is
4252 // a register to unpack into.
4253 unsigned NumElts = I.getNumOperands() - 1;
4254 Register SrcReg = I.getOperand(NumElts).getReg();
4255 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4256 const LLT WideTy = MRI.getType(SrcReg);
4257 (void)WideTy;
4258 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4259, __extension__ __PRETTY_FUNCTION__))
4259 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4259, __extension__ __PRETTY_FUNCTION__))
;
4260 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
4261 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
;
4262
4263 if (!NarrowTy.isScalar())
4264 return selectSplitVectorUnmerge(I, MRI);
4265
4266 // Choose a lane copy opcode and subregister based off of the size of the
4267 // vector's elements.
4268 unsigned CopyOpc = 0;
4269 unsigned ExtractSubReg = 0;
4270 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4271 return false;
4272
4273 // Set up for the lane copies.
4274 MachineBasicBlock &MBB = *I.getParent();
4275
4276 // Stores the registers we'll be copying from.
4277 SmallVector<Register, 4> InsertRegs;
4278
4279 // We'll use the first register twice, so we only need NumElts-1 registers.
4280 unsigned NumInsertRegs = NumElts - 1;
4281
4282 // If our elements fit into exactly 128 bits, then we can copy from the source
4283 // directly. Otherwise, we need to do a bit of setup with some subregister
4284 // inserts.
4285 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4286 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4287 } else {
4288 // No. We have to perform subregister inserts. For each insert, create an
4289 // implicit def and a subregister insert, and save the register we create.
4290 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4291 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4292 *RBI.getRegBank(SrcReg, MRI, TRI));
4293 unsigned SubReg = 0;
4294 bool Found = getSubRegForClass(RC, TRI, SubReg);
4295 (void)Found;
4296 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4296, __extension__ __PRETTY_FUNCTION__))
;
4297 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4298 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4299 MachineInstr &ImpDefMI =
4300 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4301 ImpDefReg);
4302
4303 // Now, create the subregister insert from SrcReg.
4304 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4305 MachineInstr &InsMI =
4306 *BuildMI(MBB, I, I.getDebugLoc(),
4307 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4308 .addUse(ImpDefReg)
4309 .addUse(SrcReg)
4310 .addImm(SubReg);
4311
4312 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4313 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4314
4315 // Save the register so that we can copy from it after.
4316 InsertRegs.push_back(InsertReg);
4317 }
4318 }
4319
4320 // Now that we've created any necessary subregister inserts, we can
4321 // create the copies.
4322 //
4323 // Perform the first copy separately as a subregister copy.
4324 Register CopyTo = I.getOperand(0).getReg();
4325 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4326 .addReg(InsertRegs[0], 0, ExtractSubReg);
4327 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4328
4329 // Now, perform the remaining copies as vector lane copies.
4330 unsigned LaneIdx = 1;
4331 for (Register InsReg : InsertRegs) {
4332 Register CopyTo = I.getOperand(LaneIdx).getReg();
4333 MachineInstr &CopyInst =
4334 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4335 .addUse(InsReg)
4336 .addImm(LaneIdx);
4337 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4338 ++LaneIdx;
4339 }
4340
4341 // Separately constrain the first copy's destination. Because of the
4342 // limitation in constrainOperandRegClass, we can't guarantee that this will
4343 // actually be constrained. So, do it ourselves using the second operand.
4344 const TargetRegisterClass *RC =
4345 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4346 if (!RC) {
4347 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4348 return false;
4349 }
4350
4351 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4352 I.eraseFromParent();
4353 return true;
4354}
4355
4356bool AArch64InstructionSelector::selectConcatVectors(
4357 MachineInstr &I, MachineRegisterInfo &MRI) {
4358 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4359, __extension__ __PRETTY_FUNCTION__))
4359 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4359, __extension__ __PRETTY_FUNCTION__))
;
4360 Register Dst = I.getOperand(0).getReg();
4361 Register Op1 = I.getOperand(1).getReg();
4362 Register Op2 = I.getOperand(2).getReg();
4363 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4364 if (!ConcatMI)
4365 return false;
4366 I.eraseFromParent();
4367 return true;
4368}
4369
4370unsigned
4371AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4372 MachineFunction &MF) const {
4373 Type *CPTy = CPVal->getType();
4374 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4375
4376 MachineConstantPool *MCP = MF.getConstantPool();
4377 return MCP->getConstantPoolIndex(CPVal, Alignment);
4378}
4379
4380MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4381 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4382 auto &MF = MIRBuilder.getMF();
4383 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4384
4385 auto Adrp =
4386 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4387 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4388
4389 MachineInstr *LoadMI = nullptr;
4390 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4391 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4392 switch (Size) {
4393 case 16:
4394 LoadMI =
4395 &*MIRBuilder
4396 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4397 .addConstantPoolIndex(CPIdx, 0,
4398 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4399 break;
4400 case 8:
4401 LoadMI =
4402 &*MIRBuilder
4403 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4404 .addConstantPoolIndex(CPIdx, 0,
4405 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4406 break;
4407 case 4:
4408 LoadMI =
4409 &*MIRBuilder
4410 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4411 .addConstantPoolIndex(CPIdx, 0,
4412 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4413 break;
4414 case 2:
4415 LoadMI =
4416 &*MIRBuilder
4417 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4418 .addConstantPoolIndex(CPIdx, 0,
4419 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4420 break;
4421 default:
4422 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4423 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4424 return nullptr;
4425 }
4426 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4427 MachineMemOperand::MOLoad,
4428 Size, Align(Size)));
4429 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4430 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4431 return LoadMI;
4432}
4433
4434/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4435/// size and RB.
4436static std::pair<unsigned, unsigned>
4437getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4438 unsigned Opc, SubregIdx;
4439 if (RB.getID() == AArch64::GPRRegBankID) {
4440 if (EltSize == 16) {
4441 Opc = AArch64::INSvi16gpr;
4442 SubregIdx = AArch64::ssub;
4443 } else if (EltSize == 32) {
4444 Opc = AArch64::INSvi32gpr;
4445 SubregIdx = AArch64::ssub;
4446 } else if (EltSize == 64) {
4447 Opc = AArch64::INSvi64gpr;
4448 SubregIdx = AArch64::dsub;
4449 } else {
4450 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4450)
;
4451 }
4452 } else {
4453 if (EltSize == 8) {
4454 Opc = AArch64::INSvi8lane;
4455 SubregIdx = AArch64::bsub;
4456 } else if (EltSize == 16) {
4457 Opc = AArch64::INSvi16lane;
4458 SubregIdx = AArch64::hsub;
4459 } else if (EltSize == 32) {
4460 Opc = AArch64::INSvi32lane;
4461 SubregIdx = AArch64::ssub;
4462 } else if (EltSize == 64) {
4463 Opc = AArch64::INSvi64lane;
4464 SubregIdx = AArch64::dsub;
4465 } else {
4466 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4466)
;
4467 }
4468 }
4469 return std::make_pair(Opc, SubregIdx);
4470}
4471
4472MachineInstr *AArch64InstructionSelector::emitInstr(
4473 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4474 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4475 const ComplexRendererFns &RenderFns) const {
4476 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4476, __extension__ __PRETTY_FUNCTION__))
;
4477 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4478, __extension__ __PRETTY_FUNCTION__))
4478 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4478, __extension__ __PRETTY_FUNCTION__))
;
4479 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4480 if (RenderFns)
4481 for (auto &Fn : *RenderFns)
4482 Fn(MI);
4483 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4484 return &*MI;
4485}
4486
4487MachineInstr *AArch64InstructionSelector::emitAddSub(
4488 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4489 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4490 MachineIRBuilder &MIRBuilder) const {
4491 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4492 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4492, __extension__ __PRETTY_FUNCTION__))
;
4493 auto Ty = MRI.getType(LHS.getReg());
4494 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4494, __extension__ __PRETTY_FUNCTION__))
;
4495 unsigned Size = Ty.getSizeInBits();
4496 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4496, __extension__ __PRETTY_FUNCTION__))
;
4497 bool Is32Bit = Size == 32;
4498
4499 // INSTRri form with positive arithmetic immediate.
4500 if (auto Fns = selectArithImmed(RHS))
4501 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4502 MIRBuilder, Fns);
4503
4504 // INSTRri form with negative arithmetic immediate.
4505 if (auto Fns = selectNegArithImmed(RHS))
4506 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4507 MIRBuilder, Fns);
4508
4509 // INSTRrx form.
4510 if (auto Fns = selectArithExtendedRegister(RHS))
4511 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4512 MIRBuilder, Fns);
4513
4514 // INSTRrs form.
4515 if (auto Fns = selectShiftedRegister(RHS))
4516 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4517 MIRBuilder, Fns);
4518 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4519 MIRBuilder);
4520}
4521
4522MachineInstr *
4523AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4524 MachineOperand &RHS,
4525 MachineIRBuilder &MIRBuilder) const {
4526 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4527 {{AArch64::ADDXri, AArch64::ADDWri},
4528 {AArch64::ADDXrs, AArch64::ADDWrs},
4529 {AArch64::ADDXrr, AArch64::ADDWrr},
4530 {AArch64::SUBXri, AArch64::SUBWri},
4531 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4532 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4533}
4534
4535MachineInstr *
4536AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4537 MachineOperand &RHS,
4538 MachineIRBuilder &MIRBuilder) const {
4539 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4540 {{AArch64::ADDSXri, AArch64::ADDSWri},
4541 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4542 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4543 {AArch64::SUBSXri, AArch64::SUBSWri},
4544 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4545 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4546}
4547
4548MachineInstr *
4549AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4550 MachineOperand &RHS,
4551 MachineIRBuilder &MIRBuilder) const {
4552 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4553 {{AArch64::SUBSXri, AArch64::SUBSWri},
4554 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4555 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4556 {AArch64::ADDSXri, AArch64::ADDSWri},
4557 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4558 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4559}
4560
4561MachineInstr *
4562AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4563 MachineIRBuilder &MIRBuilder) const {
4564 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4565 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4566 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4567 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4568}
4569
4570MachineInstr *
4571AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4572 MachineIRBuilder &MIRBuilder) const {
4573 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4573, __extension__ __PRETTY_FUNCTION__))
;
4574 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4575 LLT Ty = MRI.getType(LHS.getReg());
4576 unsigned RegSize = Ty.getSizeInBits();
4577 bool Is32Bit = (RegSize == 32);
4578 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4579 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4580 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4581 // ANDS needs a logical immediate for its immediate form. Check if we can
4582 // fold one in.
4583 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4584 int64_t Imm = ValAndVReg->Value.getSExtValue();
4585
4586 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4587 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4588 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4589 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4590 return &*TstMI;
4591 }
4592 }
4593
4594 if (auto Fns = selectLogicalShiftedRegister(RHS))
4595 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4596 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4597}
4598
4599MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4600 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4601 MachineIRBuilder &MIRBuilder) const {
4602 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4602, __extension__ __PRETTY_FUNCTION__))
;
4603 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4603, __extension__ __PRETTY_FUNCTION__))
;
4604 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4605 LLT CmpTy = MRI.getType(LHS.getReg());
4606 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4606, __extension__ __PRETTY_FUNCTION__))
;
4607 unsigned Size = CmpTy.getSizeInBits();
4608 (void)Size;
4609 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4609, __extension__ __PRETTY_FUNCTION__))
;
4610 // Fold the compare into a cmn or tst if possible.
4611 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4612 return FoldCmp;
4613 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4614 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4615}
4616
4617MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4618 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4619 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4620#ifndef NDEBUG
4621 LLT Ty = MRI.getType(Dst);
4622 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4623, __extension__ __PRETTY_FUNCTION__))
4623 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4623, __extension__ __PRETTY_FUNCTION__))
;
4624#endif
4625 const Register ZReg = AArch64::WZR;
4626 AArch64CC::CondCode CC1, CC2;
4627 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4628 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4629 if (CC2 == AArch64CC::AL)
4630 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4631 MIRBuilder);
4632 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4633 Register Def1Reg = MRI.createVirtualRegister(RC);
4634 Register Def2Reg = MRI.createVirtualRegister(RC);
4635 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4636 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4637 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4638 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4639 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4640 return &*OrMI;
4641}
4642
4643MachineInstr *
4644AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4645 MachineIRBuilder &MIRBuilder,
4646 Optional<CmpInst::Predicate> Pred) const {
4647 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4648 LLT Ty = MRI.getType(LHS);
4649 if (Ty.isVector())
4650 return nullptr;
4651 unsigned OpSize = Ty.getSizeInBits();
4652 if (OpSize != 32 && OpSize != 64)
4653 return nullptr;
4654
4655 // If this is a compare against +0.0, then we don't have
4656 // to explicitly materialize a constant.
4657 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4658 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4659
4660 auto IsEqualityPred = [](CmpInst::Predicate P) {
4661 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4662 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4663 };
4664 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4665 // Try commutating the operands.
4666 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4667 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4668 ShouldUseImm = true;
4669 std::swap(LHS, RHS);
4670 }
4671 }
4672 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4673 {AArch64::FCMPSri, AArch64::FCMPDri}};
4674 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4675
4676 // Partially build the compare. Decide if we need to add a use for the
4677 // third operand based off whether or not we're comparing against 0.0.
4678 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4679 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4680 if (!ShouldUseImm)
4681 CmpMI.addUse(RHS);
4682 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4683 return &*CmpMI;
4684}
4685
4686MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4687 Optional<Register> Dst, Register Op1, Register Op2,
4688 MachineIRBuilder &MIRBuilder) const {
4689 // We implement a vector concat by:
4690 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4691 // 2. Insert the upper vector into the destination's upper element
4692 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4693 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4694
4695 const LLT Op1Ty = MRI.getType(Op1);
4696 const LLT Op2Ty = MRI.getType(Op2);
4697
4698 if (Op1Ty != Op2Ty) {
4699 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4700 return nullptr;
4701 }
4702 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4702, __extension__ __PRETTY_FUNCTION__))
;
4703
4704 if (Op1Ty.getSizeInBits() >= 128) {
4705 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4706 return nullptr;
4707 }
4708
4709 // At the moment we just support 64 bit vector concats.
4710 if (Op1Ty.getSizeInBits() != 64) {
4711 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4712 return nullptr;
4713 }
4714
4715 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4716 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4717 const TargetRegisterClass *DstRC =
4718 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4719
4720 MachineInstr *WidenedOp1 =
4721 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4722 MachineInstr *WidenedOp2 =
4723 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4724 if (!WidenedOp1 || !WidenedOp2) {
4725 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4726 return nullptr;
4727 }
4728
4729 // Now do the insert of the upper element.
4730 unsigned InsertOpc, InsSubRegIdx;
4731 std::tie(InsertOpc, InsSubRegIdx) =
4732 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4733
4734 if (!Dst)
4735 Dst = MRI.createVirtualRegister(DstRC);
4736 auto InsElt =
4737 MIRBuilder
4738 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4739 .addImm(1) /* Lane index */
4740 .addUse(WidenedOp2->getOperand(0).getReg())
4741 .addImm(0);
4742 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4743 return &*InsElt;
4744}
4745
4746MachineInstr *
4747AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4748 Register Src2, AArch64CC::CondCode Pred,
4749 MachineIRBuilder &MIRBuilder) const {
4750 auto &MRI = *MIRBuilder.getMRI();
4751 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4752 // If we used a register class, then this won't necessarily have an LLT.
4753 // Compute the size based off whether or not we have a class or bank.
4754 unsigned Size;
4755 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4756 Size = TRI.getRegSizeInBits(*RC);
4757 else
4758 Size = MRI.getType(Dst).getSizeInBits();
4759 // Some opcodes use s1.
4760 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4760, __extension__ __PRETTY_FUNCTION__))
;
4761 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4762 unsigned Opc = OpcTable[Size == 64];
4763 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4764 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4765 return &*CSINC;
4766}
4767
4768std::pair<MachineInstr *, AArch64CC::CondCode>
4769AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4770 MachineOperand &LHS,
4771 MachineOperand &RHS,
4772 MachineIRBuilder &MIRBuilder) const {
4773 switch (Opcode) {
4774 default:
4775 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4775)
;
4776 case TargetOpcode::G_SADDO:
4777 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4778 case TargetOpcode::G_UADDO:
4779 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4780 case TargetOpcode::G_SSUBO:
4781 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4782 case TargetOpcode::G_USUBO:
4783 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4784 }
4785}
4786
4787/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4788/// expressed as a conjunction.
4789/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4790/// changing the conditions on the CMP tests.
4791/// (this means we can call emitConjunctionRec() with
4792/// Negate==true on this sub-tree)
4793/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4794/// cannot do the negation naturally. We are required to
4795/// emit the subtree first in this case.
4796/// \param WillNegate Is true if are called when the result of this
4797/// subexpression must be negated. This happens when the
4798/// outer expression is an OR. We can use this fact to know
4799/// that we have a double negation (or (or ...) ...) that
4800/// can be implemented for free.
4801static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4802 bool WillNegate, MachineRegisterInfo &MRI,
4803 unsigned Depth = 0) {
4804 if (!MRI.hasOneNonDBGUse(Val))
4805 return false;
4806 MachineInstr *ValDef = MRI.getVRegDef(Val);
4807 unsigned Opcode = ValDef->getOpcode();
4808 if (isa<GAnyCmp>(ValDef)) {
4809 CanNegate = true;
4810 MustBeFirst = false;
4811 return true;
4812 }
4813 // Protect against exponential runtime and stack overflow.
4814 if (Depth > 6)
4815 return false;
4816 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4817 bool IsOR = Opcode == TargetOpcode::G_OR;
4818 Register O0 = ValDef->getOperand(1).getReg();
4819 Register O1 = ValDef->getOperand(2).getReg();
4820 bool CanNegateL;
4821 bool MustBeFirstL;
4822 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4823 return false;
4824 bool CanNegateR;
4825 bool MustBeFirstR;
4826 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4827 return false;
4828
4829 if (MustBeFirstL && MustBeFirstR)
4830 return false;
4831
4832 if (IsOR) {
4833 // For an OR expression we need to be able to naturally negate at least
4834 // one side or we cannot do the transformation at all.
4835 if (!CanNegateL && !CanNegateR)
4836 return false;
4837 // If we the result of the OR will be negated and we can naturally negate
4838 // the leaves, then this sub-tree as a whole negates naturally.
4839 CanNegate = WillNegate && CanNegateL && CanNegateR;
4840 // If we cannot naturally negate the whole sub-tree, then this must be
4841 // emitted first.
4842 MustBeFirst = !CanNegate;
4843 } else {
4844 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4844, __extension__ __PRETTY_FUNCTION__))
;
4845 // We cannot naturally negate an AND operation.
4846 CanNegate = false;
4847 MustBeFirst = MustBeFirstL || MustBeFirstR;
4848 }
4849 return true;
4850 }
4851 return false;
4852}
4853
4854MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4855 Register LHS, Register RHS, CmpInst::Predicate CC,
4856 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4857 MachineIRBuilder &MIB) const {
4858 // TODO: emit CMN as an optimization.
4859 auto &MRI = *MIB.getMRI();
4860 LLT OpTy = MRI.getType(LHS);
4861 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4861, __extension__ __PRETTY_FUNCTION__))
;
4862 unsigned CCmpOpc;
4863 Optional<ValueAndVReg> C;
4864 if (CmpInst::isIntPredicate(CC)) {
4865 C = getIConstantVRegValWithLookThrough(RHS, MRI);
4866 if (C && C->Value.ult(32))
4867 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4868 else
4869 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4870 } else {
4871 switch (OpTy.getSizeInBits()) {
4872 case 16:
4873 CCmpOpc = AArch64::FCCMPHrr;
4874 break;
4875 case 32:
4876 CCmpOpc = AArch64::FCCMPSrr;
4877 break;
4878 case 64:
4879 CCmpOpc = AArch64::FCCMPDrr;
4880 break;
4881 default:
4882 return nullptr;
4883 }
4884 }
4885 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4886 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4887 auto CCmp =
4888 MIB.buildInstr(CCmpOpc, {}, {LHS});
4889 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4890 CCmp.addImm(C->Value.getZExtValue());
4891 else
4892 CCmp.addReg(RHS);
4893 CCmp.addImm(NZCV).addImm(Predicate);
4894 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4895 return &*CCmp;
4896}
4897
4898MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4899 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4900 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4901 // We're at a tree leaf, produce a conditional comparison operation.
4902 auto &MRI = *MIB.getMRI();
4903 MachineInstr *ValDef = MRI.getVRegDef(Val);
4904 unsigned Opcode = ValDef->getOpcode();
4905 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4906 Register LHS = Cmp->getLHSReg();
4907 Register RHS = Cmp->getRHSReg();
4908 CmpInst::Predicate CC = Cmp->getCond();
4909 if (Negate)
4910 CC = CmpInst::getInversePredicate(CC);
4911 if (isa<GICmp>(Cmp)) {
4912 OutCC = changeICMPPredToAArch64CC(CC);
4913 } else {
4914 // Handle special FP cases.
4915 AArch64CC::CondCode ExtraCC;
4916 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4917 // Some floating point conditions can't be tested with a single condition
4918 // code. Construct an additional comparison in this case.
4919 if (ExtraCC != AArch64CC::AL) {
4920 MachineInstr *ExtraCmp;
4921 if (!CCOp)
4922 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4923 else
4924 ExtraCmp =
4925 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4926 CCOp = ExtraCmp->getOperand(0).getReg();
4927 Predicate = ExtraCC;
4928 }
4929 }
4930
4931 // Produce a normal comparison if we are first in the chain
4932 if (!CCOp) {
4933 auto Dst = MRI.cloneVirtualRegister(LHS);
4934 if (isa<GICmp>(Cmp))
4935 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4936 return emitFPCompare(Cmp->getOperand(2).getReg(),
4937 Cmp->getOperand(3).getReg(), MIB);
4938 }
4939 // Otherwise produce a ccmp.
4940 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4941 }
4942 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4942, __extension__ __PRETTY_FUNCTION__))
;
4943
4944 bool IsOR = Opcode == TargetOpcode::G_OR;
4945
4946 Register LHS = ValDef->getOperand(1).getReg();
4947 bool CanNegateL;
4948 bool MustBeFirstL;
4949 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4950 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4950, __extension__ __PRETTY_FUNCTION__))
;
4951 (void)ValidL;
4952
4953 Register RHS = ValDef->getOperand(2).getReg();
4954 bool CanNegateR;
4955 bool MustBeFirstR;
4956 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4957 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4957, __extension__ __PRETTY_FUNCTION__))
;
4958 (void)ValidR;
4959
4960 // Swap sub-tree that must come first to the right side.
4961 if (MustBeFirstL) {
4962 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4962, __extension__ __PRETTY_FUNCTION__))
;
4963 std::swap(LHS, RHS);
4964 std::swap(CanNegateL, CanNegateR);
4965 std::swap(MustBeFirstL, MustBeFirstR);
4966 }
4967
4968 bool NegateR;
4969 bool NegateAfterR;
4970 bool NegateL;
4971 bool NegateAfterAll;
4972 if (Opcode == TargetOpcode::G_OR) {
4973 // Swap the sub-tree that we can negate naturally to the left.
4974 if (!CanNegateL) {
4975 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4975, __extension__ __PRETTY_FUNCTION__))
;
4976 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4976, __extension__ __PRETTY_FUNCTION__))
;
4977 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4977, __extension__ __PRETTY_FUNCTION__))
;
4978 std::swap(LHS, RHS);
4979 NegateR = false;
4980 NegateAfterR = true;
4981 } else {
4982 // Negate the left sub-tree if possible, otherwise negate the result.
4983 NegateR = CanNegateR;
4984 NegateAfterR = !CanNegateR;
4985 }
4986 NegateL = true;
4987 NegateAfterAll = !Negate;
4988 } else {
4989 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4990, __extension__ __PRETTY_FUNCTION__))
4990 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4990, __extension__ __PRETTY_FUNCTION__))
;
4991 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4991, __extension__ __PRETTY_FUNCTION__))
;
4992
4993 NegateL = false;
4994 NegateR = false;
4995 NegateAfterR = false;
4996 NegateAfterAll = false;
4997 }
4998
4999 // Emit sub-trees.
5000 AArch64CC::CondCode RHSCC;
5001 MachineInstr *CmpR =
5002 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
5003 if (NegateAfterR)
5004 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
5005 MachineInstr *CmpL = emitConjunctionRec(
5006 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
5007 if (NegateAfterAll)
5008 OutCC = AArch64CC::getInvertedCondCode(OutCC);
5009 return CmpL;
5010}
5011
5012MachineInstr *AArch64InstructionSelector::emitConjunction(
5013 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5014 bool DummyCanNegate;
5015 bool DummyMustBeFirst;
5016 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5017 *MIB.getMRI()))
5018 return nullptr;
5019 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5020}
5021
5022bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5023 MachineInstr &CondMI) {
5024 AArch64CC::CondCode AArch64CC;
5025 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5026 if (!ConjMI)
5027 return false;
5028
5029 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5030 SelI.eraseFromParent();
5031 return true;
5032}
5033
5034bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5035 MachineRegisterInfo &MRI = *MIB.getMRI();
5036 // We want to recognize this pattern:
5037 //
5038 // $z = G_FCMP pred, $x, $y
5039 // ...
5040 // $w = G_SELECT $z, $a, $b
5041 //
5042 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5043 // some copies/truncs in between.)
5044 //
5045 // If we see this, then we can emit something like this:
5046 //
5047 // fcmp $x, $y
5048 // fcsel $w, $a, $b, pred
5049 //
5050 // Rather than emitting both of the rather long sequences in the standard
5051 // G_FCMP/G_SELECT select methods.
5052
5053 // First, check if the condition is defined by a compare.
5054 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5055
5056 // We can only fold if all of the defs have one use.
5057 Register CondDefReg = CondDef->getOperand(0).getReg();
5058 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5059 // Unless it's another select.
5060 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5061 if (CondDef == &UI)
5062 continue;
5063 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5064 return false;
5065 }
5066 }
5067
5068 // Is the condition defined by a compare?
5069 unsigned CondOpc = CondDef->getOpcode();
5070 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5071 if (tryOptSelectConjunction(I, *CondDef))
5072 return true;
5073 return false;
5074 }
5075
5076 AArch64CC::CondCode CondCode;
5077 if (CondOpc == TargetOpcode::G_ICMP) {
5078 auto Pred =
5079 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5080 CondCode = changeICMPPredToAArch64CC(Pred);
5081 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5082 CondDef->getOperand(1), MIB);
5083 } else {
5084 // Get the condition code for the select.
5085 auto Pred =
5086 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5087 AArch64CC::CondCode CondCode2;
5088 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5089
5090 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5091 // instructions to emit the comparison.
5092 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5093 // unnecessary.
5094 if (CondCode2 != AArch64CC::AL)
5095 return false;
5096
5097 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5098 CondDef->getOperand(3).getReg(), MIB)) {
5099 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5100 return false;
5101 }
5102 }
5103
5104 // Emit the select.
5105 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5106 I.getOperand(3).getReg(), CondCode, MIB);
5107 I.eraseFromParent();
5108 return true;
5109}
5110
5111MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5112 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5113 MachineIRBuilder &MIRBuilder) const {
5114 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5115, __extension__ __PRETTY_FUNCTION__))
5115 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5115, __extension__ __PRETTY_FUNCTION__))
;
5116 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5117 // We want to find this sort of thing:
5118 // x = G_SUB 0, y
5119 // G_ICMP z, x
5120 //
5121 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5122 // e.g:
5123 //
5124 // cmn z, y
5125
5126 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5127 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5128 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5129 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5130 // Given this:
5131 //
5132 // x = G_SUB 0, y
5133 // G_ICMP x, z
5134 //
5135 // Produce this:
5136 //
5137 // cmn y, z
5138 if (isCMN(LHSDef, P, MRI))
5139 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5140
5141 // Same idea here, but with the RHS of the compare instead:
5142 //
5143 // Given this:
5144 //
5145 // x = G_SUB 0, y
5146 // G_ICMP z, x
5147 //
5148 // Produce this:
5149 //
5150 // cmn z, y
5151 if (isCMN(RHSDef, P, MRI))
5152 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5153
5154 // Given this:
5155 //
5156 // z = G_AND x, y
5157 // G_ICMP z, 0
5158 //
5159 // Produce this if the compare is signed:
5160 //
5161 // tst x, y
5162 if (!CmpInst::isUnsigned(P) && LHSDef &&
5163 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5164 // Make sure that the RHS is 0.
5165 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5166 if (!ValAndVReg || ValAndVReg->Value != 0)
5167 return nullptr;
5168
5169 return emitTST(LHSDef->getOperand(1),
5170 LHSDef->getOperand(2), MIRBuilder);
5171 }
5172
5173 return nullptr;
5174}
5175
5176bool AArch64InstructionSelector::selectShuffleVector(
5177 MachineInstr &I, MachineRegisterInfo &MRI) {
5178 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5179 Register Src1Reg = I.getOperand(1).getReg();
5180 const LLT Src1Ty = MRI.getType(Src1Reg);
5181 Register Src2Reg = I.getOperand(2).getReg();
5182 const LLT Src2Ty = MRI.getType(Src2Reg);
5183 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5184
5185 MachineBasicBlock &MBB = *I.getParent();
5186 MachineFunction &MF = *MBB.getParent();
5187 LLVMContext &Ctx = MF.getFunction().getContext();
5188
5189 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5190 // it's originated from a <1 x T> type. Those should have been lowered into
5191 // G_BUILD_VECTOR earlier.
5192 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5193 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5194 return false;
5195 }
5196
5197 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5198
5199 SmallVector<Constant *, 64> CstIdxs;
5200 for (int Val : Mask) {
5201 // For now, any undef indexes we'll just assume to be 0. This should be
5202 // optimized in future, e.g. to select DUP etc.
5203 Val = Val < 0 ? 0 : Val;
5204 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5205 unsigned Offset = Byte + Val * BytesPerElt;
5206 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5207 }
5208 }
5209
5210 // Use a constant pool to load the index vector for TBL.
5211 Constant *CPVal = ConstantVector::get(CstIdxs);
5212 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5213 if (!IndexLoad) {
5214 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5215 return false;
5216 }
5217
5218 if (DstTy.getSizeInBits() != 128) {
5219 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5219, __extension__ __PRETTY_FUNCTION__))
;
5220 // This case can be done with TBL1.
5221 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
5222 if (!Concat) {
5223 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5224 return false;
5225 }
5226
5227 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5228 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5229 IndexLoad->getOperand(0).getReg(), MIB);
5230
5231 auto TBL1 = MIB.buildInstr(
5232 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5233 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5234 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5235
5236 auto Copy =
5237 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5238 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5239 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5240 I.eraseFromParent();
5241 return true;
5242 }
5243
5244 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5245 // Q registers for regalloc.
5246 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5247 auto RegSeq = createQTuple(Regs, MIB);
5248 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5249 {RegSeq, IndexLoad->getOperand(0)});
5250 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5251 I.eraseFromParent();
5252 return true;
5253}
5254
5255MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5256 Optional<Register> DstReg, Register SrcReg, Register EltReg,
5257 unsigned LaneIdx, const RegisterBank &RB,
5258 MachineIRBuilder &MIRBuilder) const {
5259 MachineInstr *InsElt = nullptr;
5260 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5261 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5262
5263 // Create a register to define with the insert if one wasn't passed in.
5264 if (!DstReg)
5265 DstReg = MRI.createVirtualRegister(DstRC);
5266
5267 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5268 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5269
5270 if (RB.getID() == AArch64::FPRRegBankID) {
5271 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5272 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5273 .addImm(LaneIdx)
5274 .addUse(InsSub->getOperand(0).getReg())
5275 .addImm(0);
5276 } else {
5277 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5278 .addImm(LaneIdx)
5279 .addUse(EltReg);
5280 }
5281
5282 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5283 return InsElt;
5284}
5285
5286bool AArch64InstructionSelector::selectUSMovFromExtend(
5287 MachineInstr &MI, MachineRegisterInfo &MRI) {
5288 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5289 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5290 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5291 return false;
5292 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5293 const Register DefReg = MI.getOperand(0).getReg();
5294 const LLT DstTy = MRI.getType(DefReg);
5295 unsigned DstSize = DstTy.getSizeInBits();
5296
5297 if (DstSize != 32 && DstSize != 64)
5298 return false;
5299
5300 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5301 MI.getOperand(1).getReg(), MRI);
5302 int64_t Lane;
5303 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5304 return false;
5305 Register Src0 = Extract->getOperand(1).getReg();
5306
5307 const LLT &VecTy = MRI.getType(Src0);
5308
5309 if (VecTy.getSizeInBits() != 128) {
5310 const MachineInstr *ScalarToVector = emitScalarToVector(
5311 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5312 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5312, __extension__ __PRETTY_FUNCTION__))
;
5313 Src0 = ScalarToVector->getOperand(0).getReg();
5314 }
5315
5316 unsigned Opcode;
5317 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5318 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5319 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5320 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5321 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5322 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5323 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5324 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5325 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5326 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5327 else
5328 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5328)
;
5329
5330 // We may need to generate one of these, depending on the type and sign of the
5331 // input:
5332 // DstReg = SMOV Src0, Lane;
5333 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5334 MachineInstr *ExtI = nullptr;
5335 if (DstSize == 64 && !IsSigned) {
5336 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5337 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5338 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5339 .addImm(0)
5340 .addUse(NewReg)
5341 .addImm(AArch64::sub_32);
5342 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5343 } else
5344 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5345
5346 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5347 MI.eraseFromParent();
5348 return true;
5349}
5350
5351bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5352 MachineRegisterInfo &MRI) {
5353 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5353, __extension__ __PRETTY_FUNCTION__))
;
5354
5355 // Get information on the destination.
5356 Register DstReg = I.getOperand(0).getReg();
5357 const LLT DstTy = MRI.getType(DstReg);
5358 unsigned VecSize = DstTy.getSizeInBits();
5359
5360 // Get information on the element we want to insert into the destination.
5361 Register EltReg = I.getOperand(2).getReg();
5362 const LLT EltTy = MRI.getType(EltReg);
5363 unsigned EltSize = EltTy.getSizeInBits();
5364 if (EltSize < 16 || EltSize > 64)
5365 return false; // Don't support all element types yet.
5366
5367 // Find the definition of the index. Bail out if it's not defined by a
5368 // G_CONSTANT.
5369 Register IdxReg = I.getOperand(3).getReg();
5370 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5371 if (!VRegAndVal)
5372 return false;
5373 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5374
5375 // Perform the lane insert.
5376 Register SrcReg = I.getOperand(1).getReg();
5377 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5378
5379 if (VecSize < 128) {
5380 // If the vector we're inserting into is smaller than 128 bits, widen it
5381 // to 128 to do the insert.
5382 MachineInstr *ScalarToVec =
5383 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5384 if (!ScalarToVec)
5385 return false;
5386 SrcReg = ScalarToVec->getOperand(0).getReg();
5387 }
5388
5389 // Create an insert into a new FPR128 register.
5390 // Note that if our vector is already 128 bits, we end up emitting an extra
5391 // register.
5392 MachineInstr *InsMI =
5393 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5394
5395 if (VecSize < 128) {
5396 // If we had to widen to perform the insert, then we have to demote back to
5397 // the original size to get the result we want.
5398 Register DemoteVec = InsMI->getOperand(0).getReg();
5399 const TargetRegisterClass *RC =
5400 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
5401 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5402 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5403 return false;
5404 }
5405 unsigned SubReg = 0;
5406 if (!getSubRegForClass(RC, TRI, SubReg))
5407 return false;
5408 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5409 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5410 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5411 return false;
5412 }
5413 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5414 .addReg(DemoteVec, 0, SubReg);
5415 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5416 } else {
5417 // No widening needed.
5418 InsMI->getOperand(0).setReg(DstReg);
5419 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5420 }
5421
5422 I.eraseFromParent();
5423 return true;
5424}
5425
5426MachineInstr *
5427AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5428 MachineIRBuilder &MIRBuilder,
5429 MachineRegisterInfo &MRI) {
5430 LLT DstTy = MRI.getType(Dst);
5431 unsigned DstSize = DstTy.getSizeInBits();
5432 if (CV->isNullValue()) {
5433 if (DstSize == 128) {
5434 auto Mov =
5435 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5436 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5437 return &*Mov;
5438 }
5439
5440 if (DstSize == 64) {
5441 auto Mov =
5442 MIRBuilder
5443 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5444 .addImm(0);
5445 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5446 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5447 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5448 return &*Copy;
5449 }
5450 }
5451
5452 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5453 if (!CPLoad) {
5454 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5455 return nullptr;
5456 }
5457
5458 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5459 RBI.constrainGenericRegister(
5460 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5461 return &*Copy;
5462}
5463
5464bool AArch64InstructionSelector::tryOptConstantBuildVec(
5465 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5466 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5466, __extension__ __PRETTY_FUNCTION__))
;
5467 unsigned DstSize = DstTy.getSizeInBits();
5468 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5468, __extension__ __PRETTY_FUNCTION__))
;
5469 if (DstSize < 32)
5470 return false;
5471 // Check if we're building a constant vector, in which case we want to
5472 // generate a constant pool load instead of a vector insert sequence.
5473 SmallVector<Constant *, 16> Csts;
5474 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5475 // Try to find G_CONSTANT or G_FCONSTANT
5476 auto *OpMI =
5477 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5478 if (OpMI)
5479 Csts.emplace_back(
5480 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5481 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5482 I.getOperand(Idx).getReg(), MRI)))
5483 Csts.emplace_back(
5484 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5485 else
5486 return false;
5487 }
5488 Constant *CV = ConstantVector::get(Csts);
5489 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5490 return false;
5491 I.eraseFromParent();
5492 return true;
5493}
5494
5495bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5496 MachineInstr &I, MachineRegisterInfo &MRI) {
5497 // Given:
5498 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5499 //
5500 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5501 Register Dst = I.getOperand(0).getReg();
5502 Register EltReg = I.getOperand(1).getReg();
5503 LLT EltTy = MRI.getType(EltReg);
5504 // If the index isn't on the same bank as its elements, then this can't be a
5505 // SUBREG_TO_REG.
5506 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5507 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5508 if (EltRB != DstRB)
5509 return false;
5510 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5511 [&MRI](const MachineOperand &Op) {
5512 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5513 MRI);
5514 }))
5515 return false;
5516 unsigned SubReg;
5517 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5518 if (!EltRC)
5519 return false;
5520 const TargetRegisterClass *DstRC =
5521 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5522 if (!DstRC)
5523 return false;
5524 if (!getSubRegForClass(EltRC, TRI, SubReg))
5525 return false;
5526 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5527 .addImm(0)
5528 .addUse(EltReg)
5529 .addImm(SubReg);
5530 I.eraseFromParent();
5531 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5532 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5533}
5534
5535bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5536 MachineRegisterInfo &MRI) {
5537 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5537, __extension__ __PRETTY_FUNCTION__))
;
5538 // Until we port more of the optimized selections, for now just use a vector
5539 // insert sequence.
5540 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5541 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5542 unsigned EltSize = EltTy.getSizeInBits();
5543
5544 if (tryOptConstantBuildVec(I, DstTy, MRI))
5545 return true;
5546 if (tryOptBuildVecToSubregToReg(I, MRI))
5547 return true;
5548
5549 if (EltSize < 16 || EltSize > 64)
5550 return false; // Don't support all element types yet.
5551 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5552
5553 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5554 MachineInstr *ScalarToVec =
5555 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5556 I.getOperand(1).getReg(), MIB);
5557 if (!ScalarToVec)
5558 return false;
5559
5560 Register DstVec = ScalarToVec->getOperand(0).getReg();
5561 unsigned DstSize = DstTy.getSizeInBits();
5562
5563 // Keep track of the last MI we inserted. Later on, we might be able to save
5564 // a copy using it.
5565 MachineInstr *PrevMI = nullptr;
5566 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5567 // Note that if we don't do a subregister copy, we can end up making an
5568 // extra register.
5569 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5570 MIB);
5571 DstVec = PrevMI->getOperand(0).getReg();
5572 }
5573
5574 // If DstTy's size in bits is less than 128, then emit a subregister copy
5575 // from DstVec to the last register we've defined.
5576 if (DstSize < 128) {
5577 // Force this to be FPR using the destination vector.
5578 const TargetRegisterClass *RC =
5579 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5580 if (!RC)
5581 return false;
5582 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5583 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5584 return false;
5585 }
5586
5587 unsigned SubReg = 0;
5588 if (!getSubRegForClass(RC, TRI, SubReg))
5589 return false;
5590 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5591 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5592 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5593 return false;
5594 }
5595
5596 Register Reg = MRI.createVirtualRegister(RC);
5597 Register DstReg = I.getOperand(0).getReg();
5598
5599 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5600 MachineOperand &RegOp = I.getOperand(1);
5601 RegOp.setReg(Reg);
5602 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5603 } else {
5604 // We don't need a subregister copy. Save a copy by re-using the
5605 // destination register on the final insert.
5606 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5606, __extension__ __PRETTY_FUNCTION__))
;
5607 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5608 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5609 }
5610
5611 I.eraseFromParent();
5612 return true;
5613}
5614
5615bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5616 unsigned NumVecs,
5617 MachineInstr &I) {
5618 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5618, __extension__ __PRETTY_FUNCTION__))
;
5619 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5619, __extension__ __PRETTY_FUNCTION__))
;
5620 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5620, __extension__ __PRETTY_FUNCTION__))
;
5621 auto &MRI = *MIB.getMRI();
5622 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5623 unsigned Size = Ty.getSizeInBits();
5624 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5625, __extension__ __PRETTY_FUNCTION__))
5625 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5625, __extension__ __PRETTY_FUNCTION__))
;
5626 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5627 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5628 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5628, __extension__ __PRETTY_FUNCTION__))
;
5629 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5630 Load.cloneMemRefs(I);
5631 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5632 Register SelectedLoadDst = Load->getOperand(0).getReg();
5633 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5634 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5635 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5636 // Emit the subreg copies and immediately select them.
5637 // FIXME: We should refactor our copy code into an emitCopy helper and
5638 // clean up uses of this pattern elsewhere in the selector.
5639 selectCopy(*Vec, TII, MRI, TRI, RBI);
5640 }
5641 return true;
5642}
5643
5644bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5645 MachineInstr &I, MachineRegisterInfo &MRI) {
5646 // Find the intrinsic ID.
5647 unsigned IntrinID = I.getIntrinsicID();
5648
5649 const LLT S8 = LLT::scalar(8);
5650 const LLT S16 = LLT::scalar(16);
5651 const LLT S32 = LLT::scalar(32);
5652 const LLT S64 = LLT::scalar(64);
5653 const LLT P0 = LLT::pointer(0, 64);
5654 // Select the instruction.
5655 switch (IntrinID) {
5656 default:
5657 return false;
5658 case Intrinsic::aarch64_ldxp:
5659 case Intrinsic::aarch64_ldaxp: {
5660 auto NewI = MIB.buildInstr(
5661 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5662 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5663 {I.getOperand(3)});
5664 NewI.cloneMemRefs(I);
5665 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5666 break;
5667 }
5668 case Intrinsic::trap:
5669 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5670 break;