Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 3905, column 3
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/ADT/Optional.h"
24#include "llvm/BinaryFormat/Dwarf.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if None is
148 /// provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *emitFPCompare(Register LHS, Register RHS,
220 MachineIRBuilder &MIRBuilder,
221 Optional<CmpInst::Predicate> = None) const;
222
223 MachineInstr *emitInstr(unsigned Opcode,
224 std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = None) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510//
511/// Given a register bank, and a type, return the smallest register class that
512/// can represent that combination.
513static const TargetRegisterClass *
514getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
515 bool GetAllRegSet = false) {
516 if (RB.getID() == AArch64::GPRRegBankID) {
517 if (Ty.getSizeInBits() <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (Ty.getSizeInBits() == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
523 if (Ty.getSizeInBits() == 128)
524 return &AArch64::XSeqPairsClassRegClass;
525 return nullptr;
526 }
527
528 if (RB.getID() == AArch64::FPRRegBankID) {
529 switch (Ty.getSizeInBits()) {
530 case 8:
531 return &AArch64::FPR8RegClass;
532 case 16:
533 return &AArch64::FPR16RegClass;
534 case 32:
535 return &AArch64::FPR32RegClass;
536 case 64:
537 return &AArch64::FPR64RegClass;
538 case 128:
539 return &AArch64::FPR128RegClass;
540 }
541 return nullptr;
542 }
543
544 return nullptr;
545}
546
547/// Given a register bank, and size in bits, return the smallest register class
548/// that can represent that combination.
549static const TargetRegisterClass *
550getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
551 bool GetAllRegSet = false) {
552 unsigned RegBankID = RB.getID();
553
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
563 }
564
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
567 default:
568 return nullptr;
569 case 8:
570 return &AArch64::FPR8RegClass;
571 case 16:
572 return &AArch64::FPR16RegClass;
573 case 32:
574 return &AArch64::FPR32RegClass;
575 case 64:
576 return &AArch64::FPR64RegClass;
577 case 128:
578 return &AArch64::FPR128RegClass;
579 }
580 }
581
582 return nullptr;
583}
584
585/// Returns the correct subregister to use for a given register class.
586static bool getSubRegForClass(const TargetRegisterClass *RC,
587 const TargetRegisterInfo &TRI, unsigned &SubReg) {
588 switch (TRI.getRegSizeInBits(*RC)) {
589 case 8:
590 SubReg = AArch64::bsub;
591 break;
592 case 16:
593 SubReg = AArch64::hsub;
594 break;
595 case 32:
596 if (RC != &AArch64::FPR32RegClass)
597 SubReg = AArch64::sub_32;
598 else
599 SubReg = AArch64::ssub;
600 break;
601 case 64:
602 SubReg = AArch64::dsub;
603 break;
604 default:
605 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
606 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
607 return false;
608 }
609
610 return true;
611}
612
613/// Returns the minimum size the given register bank can hold.
614static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
615 switch (RB.getID()) {
616 case AArch64::GPRRegBankID:
617 return 32;
618 case AArch64::FPRRegBankID:
619 return 8;
620 default:
621 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 621)
;
622 }
623}
624
625/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
626/// Helper function for functions like createDTuple and createQTuple.
627///
628/// \p RegClassIDs - The list of register class IDs available for some tuple of
629/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
630/// expected to contain between 2 and 4 tuple classes.
631///
632/// \p SubRegs - The list of subregister classes associated with each register
633/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
634/// subregister class. The index of each subregister class is expected to
635/// correspond with the index of each register class.
636///
637/// \returns Either the destination register of REG_SEQUENCE instruction that
638/// was created, or the 0th element of \p Regs if \p Regs contains a single
639/// element.
640static Register createTuple(ArrayRef<Register> Regs,
641 const unsigned RegClassIDs[],
642 const unsigned SubRegs[], MachineIRBuilder &MIB) {
643 unsigned NumRegs = Regs.size();
644 if (NumRegs == 1)
645 return Regs[0];
646 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
647 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
;
648 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
649 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
650 auto RegSequence =
651 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
653 RegSequence.addUse(Regs[I]);
654 RegSequence.addImm(SubRegs[I]);
655 }
656 return RegSequence.getReg(0);
657}
658
659/// Create a tuple of D-registers using the registers in \p Regs.
660static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
666}
667
668/// Create a tuple of Q-registers using the registers in \p Regs.
669static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
675}
676
677static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
678 auto &MI = *Root.getParent();
679 auto &MBB = *MI.getParent();
680 auto &MF = *MBB.getParent();
681 auto &MRI = MF.getRegInfo();
682 uint64_t Immed;
683 if (Root.isImm())
684 Immed = Root.getImm();
685 else if (Root.isCImm())
686 Immed = Root.getCImm()->getZExtValue();
687 else if (Root.isReg()) {
688 auto ValAndVReg =
689 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
690 if (!ValAndVReg)
691 return None;
692 Immed = ValAndVReg->Value.getSExtValue();
693 } else
694 return None;
695 return Immed;
696}
697
698/// Check whether \p I is a currently unsupported binary operation:
699/// - it has an unsized type
700/// - an operand is not a vreg
701/// - all operands are not in the same bank
702/// These are checks that should someday live in the verifier, but right now,
703/// these are mostly limitations of the aarch64 selector.
704static bool unsupportedBinOp(const MachineInstr &I,
705 const AArch64RegisterBankInfo &RBI,
706 const MachineRegisterInfo &MRI,
707 const AArch64RegisterInfo &TRI) {
708 LLT Ty = MRI.getType(I.getOperand(0).getReg());
709 if (!Ty.isValid()) {
710 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
711 return true;
712 }
713
714 const RegisterBank *PrevOpBank = nullptr;
715 for (auto &MO : I.operands()) {
716 // FIXME: Support non-register operands.
717 if (!MO.isReg()) {
718 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
719 return true;
720 }
721
722 // FIXME: Can generic operations have physical registers operands? If
723 // so, this will need to be taught about that, and we'll need to get the
724 // bank out of the minimal class for the register.
725 // Either way, this needs to be documented (and possibly verified).
726 if (!Register::isVirtualRegister(MO.getReg())) {
727 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
728 return true;
729 }
730
731 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
732 if (!OpBank) {
733 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
734 return true;
735 }
736
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
739 return true;
740 }
741 PrevOpBank = OpBank;
742 }
743 return false;
744}
745
746/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
747/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
748/// and of size \p OpSize.
749/// \returns \p GenericOpc if the combination is unsupported.
750static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
751 unsigned OpSize) {
752 switch (RegBankID) {
753 case AArch64::GPRRegBankID:
754 if (OpSize == 32) {
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
762 default:
763 return GenericOpc;
764 }
765 } else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
775 default:
776 return GenericOpc;
777 }
778 }
779 break;
780 case AArch64::FPRRegBankID:
781 switch (OpSize) {
782 case 32:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
792 default:
793 return GenericOpc;
794 }
795 case 64:
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
807 default:
808 return GenericOpc;
809 }
810 }
811 break;
812 }
813 return GenericOpc;
814}
815
816/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
817/// appropriate for the (value) register bank \p RegBankID and of memory access
818/// size \p OpSize. This returns the variant with the base+unsigned-immediate
819/// addressing mode (e.g., LDRXui).
820/// \returns \p GenericOpc if the combination is unsupported.
821static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
822 unsigned OpSize) {
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
824 switch (RegBankID) {
825 case AArch64::GPRRegBankID:
826 switch (OpSize) {
827 case 8:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
829 case 16:
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
831 case 32:
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
833 case 64:
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
835 }
836 break;
837 case AArch64::FPRRegBankID:
838 switch (OpSize) {
839 case 8:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
841 case 16:
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
843 case 32:
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
845 case 64:
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
847 case 128:
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
849 }
850 break;
851 }
852 return GenericOpc;
853}
854
855/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
856/// to \p *To.
857///
858/// E.g "To = COPY SrcReg:SubReg"
859static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
860 const RegisterBankInfo &RBI, Register SrcReg,
861 const TargetRegisterClass *To, unsigned SubReg) {
862 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __extension__ __PRETTY_FUNCTION__))
;
864 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 864, __extension__ __PRETTY_FUNCTION__))
;
865
866 MachineIRBuilder MIB(I);
867 auto SubRegCopy =
868 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
869 MachineOperand &RegOp = I.getOperand(1);
870 RegOp.setReg(SubRegCopy.getReg(0));
871
872 // It's possible that the destination register won't be constrained. Make
873 // sure that happens.
874 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
875 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
876
877 return true;
878}
879
880/// Helper function to get the source and destination register classes for a
881/// copy. Returns a std::pair containing the source register class for the
882/// copy, and the destination register class for the copy. If a register class
883/// cannot be determined, then it will be nullptr.
884static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
885getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
886 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
887 const RegisterBankInfo &RBI) {
888 Register DstReg = I.getOperand(0).getReg();
889 Register SrcReg = I.getOperand(1).getReg();
890 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
891 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
892 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
893 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
894
895 // Special casing for cross-bank copies of s1s. We can technically represent
896 // a 1-bit value with any size of register. The minimum size for a GPR is 32
897 // bits. So, we need to put the FPR on 32 bits as well.
898 //
899 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
900 // then we can pull it into the helpers that get the appropriate class for a
901 // register bank. Or make a new helper that carries along some constraint
902 // information.
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
905
906 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
907 getMinClassForRegBank(DstRegBank, DstSize, true)};
908}
909
910static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
911 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
912 const RegisterBankInfo &RBI) {
913 Register DstReg = I.getOperand(0).getReg();
914 Register SrcReg = I.getOperand(1).getReg();
915 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
916 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
917
918 // Find the correct register classes for the source and destination registers.
919 const TargetRegisterClass *SrcRC;
920 const TargetRegisterClass *DstRC;
921 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
922
923 if (!DstRC) {
924 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
925 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
926 return false;
927 }
928
929 // Is this a copy? If so, then we may need to insert a subregister copy.
930 if (I.isCopy()) {
931 // Yes. Check if there's anything to fix up.
932 if (!SrcRC) {
933 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
934 return false;
935 }
936
937 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
938 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
939 unsigned SubReg;
940
941 // If the source bank doesn't support a subregister copy small enough,
942 // then we first need to copy to the destination bank.
943 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
944 const TargetRegisterClass *DstTempRC =
945 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
946 getSubRegForClass(DstRC, TRI, SubReg);
947
948 MachineIRBuilder MIB(I);
949 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
950 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
951 } else if (SrcSize > DstSize) {
952 // If the source register is bigger than the destination we need to
953 // perform a subregister copy.
954 const TargetRegisterClass *SubRegRC =
955 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
956 getSubRegForClass(SubRegRC, TRI, SubReg);
957 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
958 } else if (DstSize > SrcSize) {
959 // If the destination register is bigger than the source we need to do
960 // a promotion using SUBREG_TO_REG.
961 const TargetRegisterClass *PromotionRC =
962 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
963 getSubRegForClass(SrcRC, TRI, SubReg);
964
965 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
966 BuildMI(*I.getParent(), I, I.getDebugLoc(),
967 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
968 .addImm(0)
969 .addUse(SrcReg)
970 .addImm(SubReg);
971 MachineOperand &RegOp = I.getOperand(1);
972 RegOp.setReg(PromoteReg);
973 }
974
975 // If the destination is a physical register, then there's nothing to
976 // change, so we're done.
977 if (Register::isPhysicalRegister(DstReg))
978 return true;
979 }
980
981 // No need to constrain SrcReg. It will get constrained when we hit another
982 // of its use or its defs. Copies do not have constraints.
983 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
984 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
985 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
986 return false;
987 }
988
989 // If this a GPR ZEXT that we want to just reduce down into a copy.
990 // The sizes will be mismatched with the source < 32b but that's ok.
991 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
992 I.setDesc(TII.get(AArch64::COPY));
993 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 993, __extension__ __PRETTY_FUNCTION__))
;
994 return selectCopy(I, TII, MRI, TRI, RBI);
995 }
996
997 I.setDesc(TII.get(AArch64::COPY));
998 return true;
999}
1000
1001static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1002 if (!DstTy.isScalar() || !SrcTy.isScalar())
1003 return GenericOpc;
1004
1005 const unsigned DstSize = DstTy.getSizeInBits();
1006 const unsigned SrcSize = SrcTy.getSizeInBits();
1007
1008 switch (DstSize) {
1009 case 32:
1010 switch (SrcSize) {
1011 case 32:
1012 switch (GenericOpc) {
1013 case TargetOpcode::G_SITOFP:
1014 return AArch64::SCVTFUWSri;
1015 case TargetOpcode::G_UITOFP:
1016 return AArch64::UCVTFUWSri;
1017 case TargetOpcode::G_FPTOSI:
1018 return AArch64::FCVTZSUWSr;
1019 case TargetOpcode::G_FPTOUI:
1020 return AArch64::FCVTZUUWSr;
1021 default:
1022 return GenericOpc;
1023 }
1024 case 64:
1025 switch (GenericOpc) {
1026 case TargetOpcode::G_SITOFP:
1027 return AArch64::SCVTFUXSri;
1028 case TargetOpcode::G_UITOFP:
1029 return AArch64::UCVTFUXSri;
1030 case TargetOpcode::G_FPTOSI:
1031 return AArch64::FCVTZSUWDr;
1032 case TargetOpcode::G_FPTOUI:
1033 return AArch64::FCVTZUUWDr;
1034 default:
1035 return GenericOpc;
1036 }
1037 default:
1038 return GenericOpc;
1039 }
1040 case 64:
1041 switch (SrcSize) {
1042 case 32:
1043 switch (GenericOpc) {
1044 case TargetOpcode::G_SITOFP:
1045 return AArch64::SCVTFUWDri;
1046 case TargetOpcode::G_UITOFP:
1047 return AArch64::UCVTFUWDri;
1048 case TargetOpcode::G_FPTOSI:
1049 return AArch64::FCVTZSUXSr;
1050 case TargetOpcode::G_FPTOUI:
1051 return AArch64::FCVTZUUXSr;
1052 default:
1053 return GenericOpc;
1054 }
1055 case 64:
1056 switch (GenericOpc) {
1057 case TargetOpcode::G_SITOFP:
1058 return AArch64::SCVTFUXDri;
1059 case TargetOpcode::G_UITOFP:
1060 return AArch64::UCVTFUXDri;
1061 case TargetOpcode::G_FPTOSI:
1062 return AArch64::FCVTZSUXDr;
1063 case TargetOpcode::G_FPTOUI:
1064 return AArch64::FCVTZUUXDr;
1065 default:
1066 return GenericOpc;
1067 }
1068 default:
1069 return GenericOpc;
1070 }
1071 default:
1072 return GenericOpc;
1073 };
1074 return GenericOpc;
1075}
1076
1077MachineInstr *
1078AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1079 Register False, AArch64CC::CondCode CC,
1080 MachineIRBuilder &MIB) const {
1081 MachineRegisterInfo &MRI = *MIB.getMRI();
1082 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1084, __extension__ __PRETTY_FUNCTION__))
1083 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1084, __extension__ __PRETTY_FUNCTION__))
1084 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1084, __extension__ __PRETTY_FUNCTION__))
;
1085 LLT Ty = MRI.getType(True);
1086 if (Ty.isVector())
1087 return nullptr;
1088 const unsigned Size = Ty.getSizeInBits();
1089 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1090, __extension__ __PRETTY_FUNCTION__))
1090 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1090, __extension__ __PRETTY_FUNCTION__))
;
1091 const bool Is32Bit = Size == 32;
1092 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1093 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1094 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1095 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1096 return &*FCSel;
1097 }
1098
1099 // By default, we'll try and emit a CSEL.
1100 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1101 bool Optimized = false;
1102 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1103 &Optimized](Register &Reg, Register &OtherReg,
1104 bool Invert) {
1105 if (Optimized)
1106 return false;
1107
1108 // Attempt to fold:
1109 //
1110 // %sub = G_SUB 0, %x
1111 // %select = G_SELECT cc, %reg, %sub
1112 //
1113 // Into:
1114 // %select = CSNEG %reg, %x, cc
1115 Register MatchReg;
1116 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1117 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1118 Reg = MatchReg;
1119 if (Invert) {
1120 CC = AArch64CC::getInvertedCondCode(CC);
1121 std::swap(Reg, OtherReg);
1122 }
1123 return true;
1124 }
1125
1126 // Attempt to fold:
1127 //
1128 // %xor = G_XOR %x, -1
1129 // %select = G_SELECT cc, %reg, %xor
1130 //
1131 // Into:
1132 // %select = CSINV %reg, %x, cc
1133 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1134 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1135 Reg = MatchReg;
1136 if (Invert) {
1137 CC = AArch64CC::getInvertedCondCode(CC);
1138 std::swap(Reg, OtherReg);
1139 }
1140 return true;
1141 }
1142
1143 // Attempt to fold:
1144 //
1145 // %add = G_ADD %x, 1
1146 // %select = G_SELECT cc, %reg, %add
1147 //
1148 // Into:
1149 // %select = CSINC %reg, %x, cc
1150 if (mi_match(Reg, MRI,
1151 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1152 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1153 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1154 Reg = MatchReg;
1155 if (Invert) {
1156 CC = AArch64CC::getInvertedCondCode(CC);
1157 std::swap(Reg, OtherReg);
1158 }
1159 return true;
1160 }
1161
1162 return false;
1163 };
1164
1165 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1166 // true/false values are constants.
1167 // FIXME: All of these patterns already exist in tablegen. We should be
1168 // able to import these.
1169 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1170 &Optimized]() {
1171 if (Optimized)
1172 return false;
1173 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1174 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1175 if (!TrueCst && !FalseCst)
1176 return false;
1177
1178 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1179 if (TrueCst && FalseCst) {
1180 int64_t T = TrueCst->Value.getSExtValue();
1181 int64_t F = FalseCst->Value.getSExtValue();
1182
1183 if (T == 0 && F == 1) {
1184 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1186 True = ZReg;
1187 False = ZReg;
1188 return true;
1189 }
1190
1191 if (T == 0 && F == -1) {
1192 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1193 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1194 True = ZReg;
1195 False = ZReg;
1196 return true;
1197 }
1198 }
1199
1200 if (TrueCst) {
1201 int64_t T = TrueCst->Value.getSExtValue();
1202 if (T == 1) {
1203 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1204 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1205 True = False;
1206 False = ZReg;
1207 CC = AArch64CC::getInvertedCondCode(CC);
1208 return true;
1209 }
1210
1211 if (T == -1) {
1212 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1213 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1214 True = False;
1215 False = ZReg;
1216 CC = AArch64CC::getInvertedCondCode(CC);
1217 return true;
1218 }
1219 }
1220
1221 if (FalseCst) {
1222 int64_t F = FalseCst->Value.getSExtValue();
1223 if (F == 1) {
1224 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1225 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1226 False = ZReg;
1227 return true;
1228 }
1229
1230 if (F == -1) {
1231 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1232 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1233 False = ZReg;
1234 return true;
1235 }
1236 }
1237 return false;
1238 };
1239
1240 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1241 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1242 Optimized |= TryOptSelectCst();
1243 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1244 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1245 return &*SelectInst;
1246}
1247
1248static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1249 switch (P) {
1250 default:
1251 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1251)
;
1252 case CmpInst::ICMP_NE:
1253 return AArch64CC::NE;
1254 case CmpInst::ICMP_EQ:
1255 return AArch64CC::EQ;
1256 case CmpInst::ICMP_SGT:
1257 return AArch64CC::GT;
1258 case CmpInst::ICMP_SGE:
1259 return AArch64CC::GE;
1260 case CmpInst::ICMP_SLT:
1261 return AArch64CC::LT;
1262 case CmpInst::ICMP_SLE:
1263 return AArch64CC::LE;
1264 case CmpInst::ICMP_UGT:
1265 return AArch64CC::HI;
1266 case CmpInst::ICMP_UGE:
1267 return AArch64CC::HS;
1268 case CmpInst::ICMP_ULT:
1269 return AArch64CC::LO;
1270 case CmpInst::ICMP_ULE:
1271 return AArch64CC::LS;
1272 }
1273}
1274
1275/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1276static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1277 AArch64CC::CondCode &CondCode,
1278 AArch64CC::CondCode &CondCode2) {
1279 CondCode2 = AArch64CC::AL;
1280 switch (CC) {
1281 default:
1282 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1282)
;
1283 case CmpInst::FCMP_OEQ:
1284 CondCode = AArch64CC::EQ;
1285 break;
1286 case CmpInst::FCMP_OGT:
1287 CondCode = AArch64CC::GT;
1288 break;
1289 case CmpInst::FCMP_OGE:
1290 CondCode = AArch64CC::GE;
1291 break;
1292 case CmpInst::FCMP_OLT:
1293 CondCode = AArch64CC::MI;
1294 break;
1295 case CmpInst::FCMP_OLE:
1296 CondCode = AArch64CC::LS;
1297 break;
1298 case CmpInst::FCMP_ONE:
1299 CondCode = AArch64CC::MI;
1300 CondCode2 = AArch64CC::GT;
1301 break;
1302 case CmpInst::FCMP_ORD:
1303 CondCode = AArch64CC::VC;
1304 break;
1305 case CmpInst::FCMP_UNO:
1306 CondCode = AArch64CC::VS;
1307 break;
1308 case CmpInst::FCMP_UEQ:
1309 CondCode = AArch64CC::EQ;
1310 CondCode2 = AArch64CC::VS;
1311 break;
1312 case CmpInst::FCMP_UGT:
1313 CondCode = AArch64CC::HI;
1314 break;
1315 case CmpInst::FCMP_UGE:
1316 CondCode = AArch64CC::PL;
1317 break;
1318 case CmpInst::FCMP_ULT:
1319 CondCode = AArch64CC::LT;
1320 break;
1321 case CmpInst::FCMP_ULE:
1322 CondCode = AArch64CC::LE;
1323 break;
1324 case CmpInst::FCMP_UNE:
1325 CondCode = AArch64CC::NE;
1326 break;
1327 }
1328}
1329
1330/// Convert an IR fp condition code to an AArch64 CC.
1331/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1332/// should be AND'ed instead of OR'ed.
1333static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1334 AArch64CC::CondCode &CondCode,
1335 AArch64CC::CondCode &CondCode2) {
1336 CondCode2 = AArch64CC::AL;
1337 switch (CC) {
1338 default:
1339 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1340 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1340, __extension__ __PRETTY_FUNCTION__))
;
1341 break;
1342 case CmpInst::FCMP_ONE:
1343 // (a one b)
1344 // == ((a olt b) || (a ogt b))
1345 // == ((a ord b) && (a une b))
1346 CondCode = AArch64CC::VC;
1347 CondCode2 = AArch64CC::NE;
1348 break;
1349 case CmpInst::FCMP_UEQ:
1350 // (a ueq b)
1351 // == ((a uno b) || (a oeq b))
1352 // == ((a ule b) && (a uge b))
1353 CondCode = AArch64CC::PL;
1354 CondCode2 = AArch64CC::LE;
1355 break;
1356 }
1357}
1358
1359/// Return a register which can be used as a bit to test in a TB(N)Z.
1360static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1361 MachineRegisterInfo &MRI) {
1362 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1362, __extension__ __PRETTY_FUNCTION__))
;
1363 bool HasZext = false;
1364 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1365 unsigned Opc = MI->getOpcode();
1366
1367 if (!MI->getOperand(0).isReg() ||
1368 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1369 break;
1370
1371 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1372 //
1373 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1374 // on the truncated x is the same as the bit number on x.
1375 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1376 Opc == TargetOpcode::G_TRUNC) {
1377 if (Opc == TargetOpcode::G_ZEXT)
1378 HasZext = true;
1379
1380 Register NextReg = MI->getOperand(1).getReg();
1381 // Did we find something worth folding?
1382 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1383 break;
1384
1385 // NextReg is worth folding. Keep looking.
1386 Reg = NextReg;
1387 continue;
1388 }
1389
1390 // Attempt to find a suitable operation with a constant on one side.
1391 Optional<uint64_t> C;
1392 Register TestReg;
1393 switch (Opc) {
1394 default:
1395 break;
1396 case TargetOpcode::G_AND:
1397 case TargetOpcode::G_XOR: {
1398 TestReg = MI->getOperand(1).getReg();
1399 Register ConstantReg = MI->getOperand(2).getReg();
1400 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1401 if (!VRegAndVal) {
1402 // AND commutes, check the other side for a constant.
1403 // FIXME: Can we canonicalize the constant so that it's always on the
1404 // same side at some point earlier?
1405 std::swap(ConstantReg, TestReg);
1406 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1407 }
1408 if (VRegAndVal) {
1409 if (HasZext)
1410 C = VRegAndVal->Value.getZExtValue();
1411 else
1412 C = VRegAndVal->Value.getSExtValue();
1413 }
1414 break;
1415 }
1416 case TargetOpcode::G_ASHR:
1417 case TargetOpcode::G_LSHR:
1418 case TargetOpcode::G_SHL: {
1419 TestReg = MI->getOperand(1).getReg();
1420 auto VRegAndVal =
1421 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1422 if (VRegAndVal)
1423 C = VRegAndVal->Value.getSExtValue();
1424 break;
1425 }
1426 }
1427
1428 // Didn't find a constant or viable register. Bail out of the loop.
1429 if (!C || !TestReg.isValid())
1430 break;
1431
1432 // We found a suitable instruction with a constant. Check to see if we can
1433 // walk through the instruction.
1434 Register NextReg;
1435 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1436 switch (Opc) {
1437 default:
1438 break;
1439 case TargetOpcode::G_AND:
1440 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1441 if ((*C >> Bit) & 1)
1442 NextReg = TestReg;
1443 break;
1444 case TargetOpcode::G_SHL:
1445 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1446 // the type of the register.
1447 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1448 NextReg = TestReg;
1449 Bit = Bit - *C;
1450 }
1451 break;
1452 case TargetOpcode::G_ASHR:
1453 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1454 // in x
1455 NextReg = TestReg;
1456 Bit = Bit + *C;
1457 if (Bit >= TestRegSize)
1458 Bit = TestRegSize - 1;
1459 break;
1460 case TargetOpcode::G_LSHR:
1461 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1462 if ((Bit + *C) < TestRegSize) {
1463 NextReg = TestReg;
1464 Bit = Bit + *C;
1465 }
1466 break;
1467 case TargetOpcode::G_XOR:
1468 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1469 // appropriate.
1470 //
1471 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1472 //
1473 // tbz x', b -> tbnz x, b
1474 //
1475 // Because x' only has the b-th bit set if x does not.
1476 if ((*C >> Bit) & 1)
1477 Invert = !Invert;
1478 NextReg = TestReg;
1479 break;
1480 }
1481
1482 // Check if we found anything worth folding.
1483 if (!NextReg.isValid())
1484 return Reg;
1485 Reg = NextReg;
1486 }
1487
1488 return Reg;
1489}
1490
1491MachineInstr *AArch64InstructionSelector::emitTestBit(
1492 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1493 MachineIRBuilder &MIB) const {
1494 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1494, __extension__ __PRETTY_FUNCTION__))
;
1495 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1496, __extension__ __PRETTY_FUNCTION__))
1496 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1496, __extension__ __PRETTY_FUNCTION__))
;
1497 MachineRegisterInfo &MRI = *MIB.getMRI();
1498
1499 // Attempt to optimize the test bit by walking over instructions.
1500 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1501 LLT Ty = MRI.getType(TestReg);
1502 unsigned Size = Ty.getSizeInBits();
1503 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1503, __extension__ __PRETTY_FUNCTION__))
;
1504 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1504, __extension__ __PRETTY_FUNCTION__))
;
1505
1506 // When the test register is a 64-bit register, we have to narrow to make
1507 // TBNZW work.
1508 bool UseWReg = Bit < 32;
1509 unsigned NecessarySize = UseWReg ? 32 : 64;
1510 if (Size != NecessarySize)
1511 TestReg = moveScalarRegClass(
1512 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1513 MIB);
1514
1515 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1516 {AArch64::TBZW, AArch64::TBNZW}};
1517 unsigned Opc = OpcTable[UseWReg][IsNegative];
1518 auto TestBitMI =
1519 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1520 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1521 return &*TestBitMI;
1522}
1523
1524bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1525 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1526 MachineIRBuilder &MIB) const {
1527 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1527, __extension__ __PRETTY_FUNCTION__))
;
1528 // Given something like this:
1529 //
1530 // %x = ...Something...
1531 // %one = G_CONSTANT i64 1
1532 // %zero = G_CONSTANT i64 0
1533 // %and = G_AND %x, %one
1534 // %cmp = G_ICMP intpred(ne), %and, %zero
1535 // %cmp_trunc = G_TRUNC %cmp
1536 // G_BRCOND %cmp_trunc, %bb.3
1537 //
1538 // We want to try and fold the AND into the G_BRCOND and produce either a
1539 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1540 //
1541 // In this case, we'd get
1542 //
1543 // TBNZ %x %bb.3
1544 //
1545
1546 // Check if the AND has a constant on its RHS which we can use as a mask.
1547 // If it's a power of 2, then it's the same as checking a specific bit.
1548 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1549 auto MaybeBit = getIConstantVRegValWithLookThrough(
1550 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1551 if (!MaybeBit)
1552 return false;
1553
1554 int32_t Bit = MaybeBit->Value.exactLogBase2();
1555 if (Bit < 0)
1556 return false;
1557
1558 Register TestReg = AndInst.getOperand(1).getReg();
1559
1560 // Emit a TB(N)Z.
1561 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1562 return true;
1563}
1564
1565MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1566 bool IsNegative,
1567 MachineBasicBlock *DestMBB,
1568 MachineIRBuilder &MIB) const {
1569 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1569, __extension__ __PRETTY_FUNCTION__))
;
1570 MachineRegisterInfo &MRI = *MIB.getMRI();
1571 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1573, __extension__ __PRETTY_FUNCTION__))
1572 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1573, __extension__ __PRETTY_FUNCTION__))
1573 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1573, __extension__ __PRETTY_FUNCTION__))
;
1574 auto Ty = MRI.getType(CompareReg);
1575 unsigned Width = Ty.getSizeInBits();
1576 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1576, __extension__ __PRETTY_FUNCTION__))
;
1577 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1577, __extension__ __PRETTY_FUNCTION__))
;
1578 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1579 {AArch64::CBNZW, AArch64::CBNZX}};
1580 unsigned Opc = OpcTable[IsNegative][Width == 64];
1581 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1582 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1583 return &*BranchMI;
1584}
1585
1586bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1587 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1588 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1588, __extension__ __PRETTY_FUNCTION__))
;
1589 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1589, __extension__ __PRETTY_FUNCTION__))
;
1590 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1591 // totally clean. Some of them require two branches to implement.
1592 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1593 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1594 Pred);
1595 AArch64CC::CondCode CC1, CC2;
1596 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1597 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1598 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1599 if (CC2 != AArch64CC::AL)
1600 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1601 I.eraseFromParent();
1602 return true;
1603}
1604
1605bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1606 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1607 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1607, __extension__ __PRETTY_FUNCTION__))
;
1608 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1608, __extension__ __PRETTY_FUNCTION__))
;
1609 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1610 //
1611 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1612 // instructions will not be produced, as they are conditional branch
1613 // instructions that do not set flags.
1614 if (!ProduceNonFlagSettingCondBr)
1615 return false;
1616
1617 MachineRegisterInfo &MRI = *MIB.getMRI();
1618 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1619 auto Pred =
1620 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1621 Register LHS = ICmp.getOperand(2).getReg();
1622 Register RHS = ICmp.getOperand(3).getReg();
1623
1624 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1625 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1626 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1627
1628 // When we can emit a TB(N)Z, prefer that.
1629 //
1630 // Handle non-commutative condition codes first.
1631 // Note that we don't want to do this when we have a G_AND because it can
1632 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1633 if (VRegAndVal && !AndInst) {
1634 int64_t C = VRegAndVal->Value.getSExtValue();
1635
1636 // When we have a greater-than comparison, we can just test if the msb is
1637 // zero.
1638 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1639 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1640 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1641 I.eraseFromParent();
1642 return true;
1643 }
1644
1645 // When we have a less than comparison, we can just test if the msb is not
1646 // zero.
1647 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1648 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1649 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1650 I.eraseFromParent();
1651 return true;
1652 }
1653 }
1654
1655 // Attempt to handle commutative condition codes. Right now, that's only
1656 // eq/ne.
1657 if (ICmpInst::isEquality(Pred)) {
1658 if (!VRegAndVal) {
1659 std::swap(RHS, LHS);
1660 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1661 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1662 }
1663
1664 if (VRegAndVal && VRegAndVal->Value == 0) {
1665 // If there's a G_AND feeding into this branch, try to fold it away by
1666 // emitting a TB(N)Z instead.
1667 //
1668 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1669 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1670 // would be redundant.
1671 if (AndInst &&
1672 tryOptAndIntoCompareBranch(
1673 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1674 I.eraseFromParent();
1675 return true;
1676 }
1677
1678 // Otherwise, try to emit a CB(N)Z instead.
1679 auto LHSTy = MRI.getType(LHS);
1680 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1681 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1682 I.eraseFromParent();
1683 return true;
1684 }
1685 }
1686 }
1687
1688 return false;
1689}
1690
1691bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1692 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1693 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1693, __extension__ __PRETTY_FUNCTION__))
;
1694 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1694, __extension__ __PRETTY_FUNCTION__))
;
1695 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1696 return true;
1697
1698 // Couldn't optimize. Emit a compare + a Bcc.
1699 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1700 auto PredOp = ICmp.getOperand(1);
1701 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1702 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1703 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1704 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1705 I.eraseFromParent();
1706 return true;
1707}
1708
1709bool AArch64InstructionSelector::selectCompareBranch(
1710 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1711 Register CondReg = I.getOperand(0).getReg();
1712 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1713 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1714 CondReg = CCMI->getOperand(1).getReg();
1715 CCMI = MRI.getVRegDef(CondReg);
1716 }
1717
1718 // Try to select the G_BRCOND using whatever is feeding the condition if
1719 // possible.
1720 unsigned CCMIOpc = CCMI->getOpcode();
1721 if (CCMIOpc == TargetOpcode::G_FCMP)
1722 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1723 if (CCMIOpc == TargetOpcode::G_ICMP)
1724 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1725
1726 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1727 // instructions will not be produced, as they are conditional branch
1728 // instructions that do not set flags.
1729 if (ProduceNonFlagSettingCondBr) {
1730 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1731 I.getOperand(1).getMBB(), MIB);
1732 I.eraseFromParent();
1733 return true;
1734 }
1735
1736 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1737 auto TstMI =
1738 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1739 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1740 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1741 .addImm(AArch64CC::EQ)
1742 .addMBB(I.getOperand(1).getMBB());
1743 I.eraseFromParent();
1744 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1745}
1746
1747/// Returns the element immediate value of a vector shift operand if found.
1748/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1749static Optional<int64_t> getVectorShiftImm(Register Reg,
1750 MachineRegisterInfo &MRI) {
1751 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1751, __extension__ __PRETTY_FUNCTION__))
;
1752 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1753 return getAArch64VectorSplatScalar(*OpMI, MRI);
1754}
1755
1756/// Matches and returns the shift immediate value for a SHL instruction given
1757/// a shift operand.
1758static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1759 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1760 if (!ShiftImm)
1761 return None;
1762 // Check the immediate is in range for a SHL.
1763 int64_t Imm = *ShiftImm;
1764 if (Imm < 0)
1765 return None;
1766 switch (SrcTy.getElementType().getSizeInBits()) {
1767 default:
1768 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1769 return None;
1770 case 8:
1771 if (Imm > 7)
1772 return None;
1773 break;
1774 case 16:
1775 if (Imm > 15)
1776 return None;
1777 break;
1778 case 32:
1779 if (Imm > 31)
1780 return None;
1781 break;
1782 case 64:
1783 if (Imm > 63)
1784 return None;
1785 break;
1786 }
1787 return Imm;
1788}
1789
1790bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1791 MachineRegisterInfo &MRI) {
1792 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1792, __extension__ __PRETTY_FUNCTION__))
;
1793 Register DstReg = I.getOperand(0).getReg();
1794 const LLT Ty = MRI.getType(DstReg);
1795 Register Src1Reg = I.getOperand(1).getReg();
1796 Register Src2Reg = I.getOperand(2).getReg();
1797
1798 if (!Ty.isVector())
1799 return false;
1800
1801 // Check if we have a vector of constants on RHS that we can select as the
1802 // immediate form.
1803 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1804
1805 unsigned Opc = 0;
1806 if (Ty == LLT::fixed_vector(2, 64)) {
1807 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1808 } else if (Ty == LLT::fixed_vector(4, 32)) {
1809 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1810 } else if (Ty == LLT::fixed_vector(2, 32)) {
1811 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1812 } else if (Ty == LLT::fixed_vector(4, 16)) {
1813 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1814 } else if (Ty == LLT::fixed_vector(8, 16)) {
1815 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1816 } else if (Ty == LLT::fixed_vector(16, 8)) {
1817 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1818 } else if (Ty == LLT::fixed_vector(8, 8)) {
1819 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1820 } else {
1821 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1822 return false;
1823 }
1824
1825 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1826 if (ImmVal)
1827 Shl.addImm(*ImmVal);
1828 else
1829 Shl.addUse(Src2Reg);
1830 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1831 I.eraseFromParent();
1832 return true;
1833}
1834
1835bool AArch64InstructionSelector::selectVectorAshrLshr(
1836 MachineInstr &I, MachineRegisterInfo &MRI) {
1837 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1838, __extension__ __PRETTY_FUNCTION__))
1838 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1838, __extension__ __PRETTY_FUNCTION__))
;
1839 Register DstReg = I.getOperand(0).getReg();
1840 const LLT Ty = MRI.getType(DstReg);
1841 Register Src1Reg = I.getOperand(1).getReg();
1842 Register Src2Reg = I.getOperand(2).getReg();
1843
1844 if (!Ty.isVector())
1845 return false;
1846
1847 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1848
1849 // We expect the immediate case to be lowered in the PostLegalCombiner to
1850 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1851
1852 // There is not a shift right register instruction, but the shift left
1853 // register instruction takes a signed value, where negative numbers specify a
1854 // right shift.
1855
1856 unsigned Opc = 0;
1857 unsigned NegOpc = 0;
1858 const TargetRegisterClass *RC =
1859 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1860 if (Ty == LLT::fixed_vector(2, 64)) {
1861 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1862 NegOpc = AArch64::NEGv2i64;
1863 } else if (Ty == LLT::fixed_vector(4, 32)) {
1864 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1865 NegOpc = AArch64::NEGv4i32;
1866 } else if (Ty == LLT::fixed_vector(2, 32)) {
1867 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1868 NegOpc = AArch64::NEGv2i32;
1869 } else if (Ty == LLT::fixed_vector(4, 16)) {
1870 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1871 NegOpc = AArch64::NEGv4i16;
1872 } else if (Ty == LLT::fixed_vector(8, 16)) {
1873 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1874 NegOpc = AArch64::NEGv8i16;
1875 } else if (Ty == LLT::fixed_vector(16, 8)) {
1876 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1877 NegOpc = AArch64::NEGv16i8;
1878 } else if (Ty == LLT::fixed_vector(8, 8)) {
1879 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1880 NegOpc = AArch64::NEGv8i8;
1881 } else {
1882 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1883 return false;
1884 }
1885
1886 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1887 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1888 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1889 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1890 I.eraseFromParent();
1891 return true;
1892}
1893
1894bool AArch64InstructionSelector::selectVaStartAAPCS(
1895 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1896 return false;
1897}
1898
1899bool AArch64InstructionSelector::selectVaStartDarwin(
1900 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1901 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1902 Register ListReg = I.getOperand(0).getReg();
1903
1904 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1905
1906 auto MIB =
1907 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1908 .addDef(ArgsAddrReg)
1909 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1910 .addImm(0)
1911 .addImm(0);
1912
1913 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1914
1915 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1916 .addUse(ArgsAddrReg)
1917 .addUse(ListReg)
1918 .addImm(0)
1919 .addMemOperand(*I.memoperands_begin());
1920
1921 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1922 I.eraseFromParent();
1923 return true;
1924}
1925
1926void AArch64InstructionSelector::materializeLargeCMVal(
1927 MachineInstr &I, const Value *V, unsigned OpFlags) {
1928 MachineBasicBlock &MBB = *I.getParent();
1929 MachineFunction &MF = *MBB.getParent();
1930 MachineRegisterInfo &MRI = MF.getRegInfo();
1931
1932 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1933 MovZ->addOperand(MF, I.getOperand(1));
1934 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1935 AArch64II::MO_NC);
1936 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1937 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1938
1939 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1940 Register ForceDstReg) {
1941 Register DstReg = ForceDstReg
1942 ? ForceDstReg
1943 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1944 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1945 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1946 MovI->addOperand(MF, MachineOperand::CreateGA(
1947 GV, MovZ->getOperand(1).getOffset(), Flags));
1948 } else {
1949 MovI->addOperand(
1950 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1951 MovZ->getOperand(1).getOffset(), Flags));
1952 }
1953 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1954 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1955 return DstReg;
1956 };
1957 Register DstReg = BuildMovK(MovZ.getReg(0),
1958 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1959 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1960 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1961}
1962
1963bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1964 MachineBasicBlock &MBB = *I.getParent();
1965 MachineFunction &MF = *MBB.getParent();
1966 MachineRegisterInfo &MRI = MF.getRegInfo();
1967
1968 switch (I.getOpcode()) {
1969 case TargetOpcode::G_STORE: {
1970 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1971 MachineOperand &SrcOp = I.getOperand(0);
1972 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1973 // Allow matching with imported patterns for stores of pointers. Unlike
1974 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1975 // and constrain.
1976 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1977 Register NewSrc = Copy.getReg(0);
1978 SrcOp.setReg(NewSrc);
1979 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1980 Changed = true;
1981 }
1982 return Changed;
1983 }
1984 case TargetOpcode::G_PTR_ADD:
1985 return convertPtrAddToAdd(I, MRI);
1986 case TargetOpcode::G_LOAD: {
1987 // For scalar loads of pointers, we try to convert the dest type from p0
1988 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1989 // conversion, this should be ok because all users should have been
1990 // selected already, so the type doesn't matter for them.
1991 Register DstReg = I.getOperand(0).getReg();
1992 const LLT DstTy = MRI.getType(DstReg);
1993 if (!DstTy.isPointer())
1994 return false;
1995 MRI.setType(DstReg, LLT::scalar(64));
1996 return true;
1997 }
1998 case AArch64::G_DUP: {
1999 // Convert the type from p0 to s64 to help selection.
2000 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2001 if (!DstTy.getElementType().isPointer())
2002 return false;
2003 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2004 MRI.setType(I.getOperand(0).getReg(),
2005 DstTy.changeElementType(LLT::scalar(64)));
2006 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2007 I.getOperand(1).setReg(NewSrc.getReg(0));
2008 return true;
2009 }
2010 case TargetOpcode::G_UITOFP:
2011 case TargetOpcode::G_SITOFP: {
2012 // If both source and destination regbanks are FPR, then convert the opcode
2013 // to G_SITOF so that the importer can select it to an fpr variant.
2014 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2015 // copy.
2016 Register SrcReg = I.getOperand(1).getReg();
2017 LLT SrcTy = MRI.getType(SrcReg);
2018 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2019 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2020 return false;
2021
2022 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2023 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2024 I.setDesc(TII.get(AArch64::G_SITOF));
2025 else
2026 I.setDesc(TII.get(AArch64::G_UITOF));
2027 return true;
2028 }
2029 return false;
2030 }
2031 default:
2032 return false;
2033 }
2034}
2035
2036/// This lowering tries to look for G_PTR_ADD instructions and then converts
2037/// them to a standard G_ADD with a COPY on the source.
2038///
2039/// The motivation behind this is to expose the add semantics to the imported
2040/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2041/// because the selector works bottom up, uses before defs. By the time we
2042/// end up trying to select a G_PTR_ADD, we should have already attempted to
2043/// fold this into addressing modes and were therefore unsuccessful.
2044bool AArch64InstructionSelector::convertPtrAddToAdd(
2045 MachineInstr &I, MachineRegisterInfo &MRI) {
2046 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2046, __extension__ __PRETTY_FUNCTION__))
;
2047 Register DstReg = I.getOperand(0).getReg();
2048 Register AddOp1Reg = I.getOperand(1).getReg();
2049 const LLT PtrTy = MRI.getType(DstReg);
2050 if (PtrTy.getAddressSpace() != 0)
2051 return false;
2052
2053 const LLT CastPtrTy =
2054 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2055 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2056 // Set regbanks on the registers.
2057 if (PtrTy.isVector())
2058 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2059 else
2060 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2061
2062 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2063 // %dst(intty) = G_ADD %intbase, off
2064 I.setDesc(TII.get(TargetOpcode::G_ADD));
2065 MRI.setType(DstReg, CastPtrTy);
2066 I.getOperand(1).setReg(PtrToInt.getReg(0));
2067 if (!select(*PtrToInt)) {
2068 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2069 return false;
2070 }
2071
2072 // Also take the opportunity here to try to do some optimization.
2073 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2074 Register NegatedReg;
2075 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2076 return true;
2077 I.getOperand(2).setReg(NegatedReg);
2078 I.setDesc(TII.get(TargetOpcode::G_SUB));
2079 return true;
2080}
2081
2082bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2083 MachineRegisterInfo &MRI) {
2084 // We try to match the immediate variant of LSL, which is actually an alias
2085 // for a special case of UBFM. Otherwise, we fall back to the imported
2086 // selector which will match the register variant.
2087 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2087, __extension__ __PRETTY_FUNCTION__))
;
2088 const auto &MO = I.getOperand(2);
2089 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2090 if (!VRegAndVal)
2091 return false;
2092
2093 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2094 if (DstTy.isVector())
2095 return false;
2096 bool Is64Bit = DstTy.getSizeInBits() == 64;
2097 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2098 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2099
2100 if (!Imm1Fn || !Imm2Fn)
2101 return false;
2102
2103 auto NewI =
2104 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2105 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2106
2107 for (auto &RenderFn : *Imm1Fn)
2108 RenderFn(NewI);
2109 for (auto &RenderFn : *Imm2Fn)
2110 RenderFn(NewI);
2111
2112 I.eraseFromParent();
2113 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2114}
2115
2116bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2117 MachineInstr &I, MachineRegisterInfo &MRI) {
2118 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2118, __extension__ __PRETTY_FUNCTION__))
;
2119 // If we're storing a scalar, it doesn't matter what register bank that
2120 // scalar is on. All that matters is the size.
2121 //
2122 // So, if we see something like this (with a 32-bit scalar as an example):
2123 //
2124 // %x:gpr(s32) = ... something ...
2125 // %y:fpr(s32) = COPY %x:gpr(s32)
2126 // G_STORE %y:fpr(s32)
2127 //
2128 // We can fix this up into something like this:
2129 //
2130 // G_STORE %x:gpr(s32)
2131 //
2132 // And then continue the selection process normally.
2133 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2134 if (!DefDstReg.isValid())
2135 return false;
2136 LLT DefDstTy = MRI.getType(DefDstReg);
2137 Register StoreSrcReg = I.getOperand(0).getReg();
2138 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2139
2140 // If we get something strange like a physical register, then we shouldn't
2141 // go any further.
2142 if (!DefDstTy.isValid())
2143 return false;
2144
2145 // Are the source and dst types the same size?
2146 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2147 return false;
2148
2149 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2150 RBI.getRegBank(DefDstReg, MRI, TRI))
2151 return false;
2152
2153 // We have a cross-bank copy, which is entering a store. Let's fold it.
2154 I.getOperand(0).setReg(DefDstReg);
2155 return true;
2156}
2157
2158bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2159 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2159, __extension__ __PRETTY_FUNCTION__))
;
2160 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2160, __extension__ __PRETTY_FUNCTION__))
;
2161
2162 MachineBasicBlock &MBB = *I.getParent();
2163 MachineFunction &MF = *MBB.getParent();
2164 MachineRegisterInfo &MRI = MF.getRegInfo();
2165
2166 switch (I.getOpcode()) {
2167 case AArch64::G_DUP: {
2168 // Before selecting a DUP instruction, check if it is better selected as a
2169 // MOV or load from a constant pool.
2170 Register Src = I.getOperand(1).getReg();
2171 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2172 if (!ValAndVReg)
2173 return false;
2174 LLVMContext &Ctx = MF.getFunction().getContext();
2175 Register Dst = I.getOperand(0).getReg();
2176 auto *CV = ConstantDataVector::getSplat(
2177 MRI.getType(Dst).getNumElements(),
2178 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2179 ValAndVReg->Value));
2180 if (!emitConstantVector(Dst, CV, MIB, MRI))
2181 return false;
2182 I.eraseFromParent();
2183 return true;
2184 }
2185 case TargetOpcode::G_SEXT:
2186 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2187 // over a normal extend.
2188 if (selectUSMovFromExtend(I, MRI))
2189 return true;
2190 return false;
2191 case TargetOpcode::G_BR:
2192 return false;
2193 case TargetOpcode::G_SHL:
2194 return earlySelectSHL(I, MRI);
2195 case TargetOpcode::G_CONSTANT: {
2196 bool IsZero = false;
2197 if (I.getOperand(1).isCImm())
2198 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2199 else if (I.getOperand(1).isImm())
2200 IsZero = I.getOperand(1).getImm() == 0;
2201
2202 if (!IsZero)
2203 return false;
2204
2205 Register DefReg = I.getOperand(0).getReg();
2206 LLT Ty = MRI.getType(DefReg);
2207 if (Ty.getSizeInBits() == 64) {
2208 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2209 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2210 } else if (Ty.getSizeInBits() == 32) {
2211 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2212 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2213 } else
2214 return false;
2215
2216 I.setDesc(TII.get(TargetOpcode::COPY));
2217 return true;
2218 }
2219
2220 case TargetOpcode::G_ADD: {
2221 // Check if this is being fed by a G_ICMP on either side.
2222 //
2223 // (cmp pred, x, y) + z
2224 //
2225 // In the above case, when the cmp is true, we increment z by 1. So, we can
2226 // fold the add into the cset for the cmp by using cinc.
2227 //
2228 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2229 Register AddDst = I.getOperand(0).getReg();
2230 Register AddLHS = I.getOperand(1).getReg();
2231 Register AddRHS = I.getOperand(2).getReg();
2232 // Only handle scalars.
2233 LLT Ty = MRI.getType(AddLHS);
2234 if (Ty.isVector())
2235 return false;
2236 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2237 // bits.
2238 unsigned Size = Ty.getSizeInBits();
2239 if (Size != 32 && Size != 64)
2240 return false;
2241 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2242 if (!MRI.hasOneNonDBGUse(Reg))
2243 return nullptr;
2244 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2245 // compare.
2246 if (Size == 32)
2247 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2248 // We model scalar compares using 32-bit destinations right now.
2249 // If it's a 64-bit compare, it'll have 64-bit sources.
2250 Register ZExt;
2251 if (!mi_match(Reg, MRI,
2252 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2253 return nullptr;
2254 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2255 if (!Cmp ||
2256 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2257 return nullptr;
2258 return Cmp;
2259 };
2260 // Try to match
2261 // z + (cmp pred, x, y)
2262 MachineInstr *Cmp = MatchCmp(AddRHS);
2263 if (!Cmp) {
2264 // (cmp pred, x, y) + z
2265 std::swap(AddLHS, AddRHS);
2266 Cmp = MatchCmp(AddRHS);
2267 if (!Cmp)
2268 return false;
2269 }
2270 auto &PredOp = Cmp->getOperand(1);
2271 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2272 const AArch64CC::CondCode InvCC =
2273 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2274 MIB.setInstrAndDebugLoc(I);
2275 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2276 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2277 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2278 I.eraseFromParent();
2279 return true;
2280 }
2281 case TargetOpcode::G_OR: {
2282 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2283 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2284 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2285 Register Dst = I.getOperand(0).getReg();
2286 LLT Ty = MRI.getType(Dst);
2287
2288 if (!Ty.isScalar())
2289 return false;
2290
2291 unsigned Size = Ty.getSizeInBits();
2292 if (Size != 32 && Size != 64)
2293 return false;
2294
2295 Register ShiftSrc;
2296 int64_t ShiftImm;
2297 Register MaskSrc;
2298 int64_t MaskImm;
2299 if (!mi_match(
2300 Dst, MRI,
2301 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2302 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2303 return false;
2304
2305 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2306 return false;
2307
2308 int64_t Immr = Size - ShiftImm;
2309 int64_t Imms = Size - ShiftImm - 1;
2310 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2311 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2312 I.eraseFromParent();
2313 return true;
2314 }
2315 default:
2316 return false;
2317 }
2318}
2319
2320bool AArch64InstructionSelector::select(MachineInstr &I) {
2321 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2321, __extension__ __PRETTY_FUNCTION__))
;
2322 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2322, __extension__ __PRETTY_FUNCTION__))
;
2323
2324 MachineBasicBlock &MBB = *I.getParent();
2325 MachineFunction &MF = *MBB.getParent();
2326 MachineRegisterInfo &MRI = MF.getRegInfo();
2327
2328 const AArch64Subtarget *Subtarget =
2329 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2330 if (Subtarget->requiresStrictAlign()) {
2331 // We don't support this feature yet.
2332 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2333 return false;
2334 }
2335
2336 MIB.setInstrAndDebugLoc(I);
2337
2338 unsigned Opcode = I.getOpcode();
2339 // G_PHI requires same handling as PHI
2340 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2341 // Certain non-generic instructions also need some special handling.
2342
2343 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2344 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2345
2346 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2347 const Register DefReg = I.getOperand(0).getReg();
2348 const LLT DefTy = MRI.getType(DefReg);
2349
2350 const RegClassOrRegBank &RegClassOrBank =
2351 MRI.getRegClassOrRegBank(DefReg);
2352
2353 const TargetRegisterClass *DefRC
2354 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2355 if (!DefRC) {
2356 if (!DefTy.isValid()) {
2357 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2358 return false;
2359 }
2360 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2361 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2362 if (!DefRC) {
2363 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2364 return false;
2365 }
2366 }
2367
2368 I.setDesc(TII.get(TargetOpcode::PHI));
2369
2370 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2371 }
2372
2373 if (I.isCopy())
2374 return selectCopy(I, TII, MRI, TRI, RBI);
2375
2376 return true;
2377 }
2378
2379
2380 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2381 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2382 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2383 return false;
2384 }
2385
2386 // Try to do some lowering before we start instruction selecting. These
2387 // lowerings are purely transformations on the input G_MIR and so selection
2388 // must continue after any modification of the instruction.
2389 if (preISelLower(I)) {
2390 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2391 }
2392
2393 // There may be patterns where the importer can't deal with them optimally,
2394 // but does select it to a suboptimal sequence so our custom C++ selection
2395 // code later never has a chance to work on it. Therefore, we have an early
2396 // selection attempt here to give priority to certain selection routines
2397 // over the imported ones.
2398 if (earlySelect(I))
2399 return true;
2400
2401 if (selectImpl(I, *CoverageInfo))
2402 return true;
2403
2404 LLT Ty =
2405 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2406
2407 switch (Opcode) {
2408 case TargetOpcode::G_SBFX:
2409 case TargetOpcode::G_UBFX: {
2410 static const unsigned OpcTable[2][2] = {
2411 {AArch64::UBFMWri, AArch64::UBFMXri},
2412 {AArch64::SBFMWri, AArch64::SBFMXri}};
2413 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2414 unsigned Size = Ty.getSizeInBits();
2415 unsigned Opc = OpcTable[IsSigned][Size == 64];
2416 auto Cst1 =
2417 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2418 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2418, __extension__ __PRETTY_FUNCTION__))
;
2419 auto Cst2 =
2420 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2421 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2421, __extension__ __PRETTY_FUNCTION__))
;
2422 auto LSB = Cst1->Value.getZExtValue();
2423 auto Width = Cst2->Value.getZExtValue();
2424 auto BitfieldInst =
2425 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2426 .addImm(LSB)
2427 .addImm(LSB + Width - 1);
2428 I.eraseFromParent();
2429 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2430 }
2431 case TargetOpcode::G_BRCOND:
2432 return selectCompareBranch(I, MF, MRI);
2433
2434 case TargetOpcode::G_BRINDIRECT: {
2435 I.setDesc(TII.get(AArch64::BR));
2436 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2437 }
2438
2439 case TargetOpcode::G_BRJT:
2440 return selectBrJT(I, MRI);
2441
2442 case AArch64::G_ADD_LOW: {
2443 // This op may have been separated from it's ADRP companion by the localizer
2444 // or some other code motion pass. Given that many CPUs will try to
2445 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2446 // which will later be expanded into an ADRP+ADD pair after scheduling.
2447 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2448 if (BaseMI->getOpcode() != AArch64::ADRP) {
2449 I.setDesc(TII.get(AArch64::ADDXri));
2450 I.addOperand(MachineOperand::CreateImm(0));
2451 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2452 }
2453 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2454, __extension__ __PRETTY_FUNCTION__))
2454 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2454, __extension__ __PRETTY_FUNCTION__))
;
2455 auto Op1 = BaseMI->getOperand(1);
2456 auto Op2 = I.getOperand(2);
2457 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2458 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2459 Op1.getTargetFlags())
2460 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2461 Op2.getTargetFlags());
2462 I.eraseFromParent();
2463 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2464 }
2465
2466 case TargetOpcode::G_BSWAP: {
2467 // Handle vector types for G_BSWAP directly.
2468 Register DstReg = I.getOperand(0).getReg();
2469 LLT DstTy = MRI.getType(DstReg);
2470
2471 // We should only get vector types here; everything else is handled by the
2472 // importer right now.
2473 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2474 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2475 return false;
2476 }
2477
2478 // Only handle 4 and 2 element vectors for now.
2479 // TODO: 16-bit elements.
2480 unsigned NumElts = DstTy.getNumElements();
2481 if (NumElts != 4 && NumElts != 2) {
2482 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2483 return false;
2484 }
2485
2486 // Choose the correct opcode for the supported types. Right now, that's
2487 // v2s32, v4s32, and v2s64.
2488 unsigned Opc = 0;
2489 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2490 if (EltSize == 32)
2491 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2492 : AArch64::REV32v16i8;
2493 else if (EltSize == 64)
2494 Opc = AArch64::REV64v16i8;
2495
2496 // We should always get something by the time we get here...
2497 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2497, __extension__ __PRETTY_FUNCTION__))
;
2498
2499 I.setDesc(TII.get(Opc));
2500 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2501 }
2502
2503 case TargetOpcode::G_FCONSTANT:
2504 case TargetOpcode::G_CONSTANT: {
2505 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2506
2507 const LLT s8 = LLT::scalar(8);
2508 const LLT s16 = LLT::scalar(16);
2509 const LLT s32 = LLT::scalar(32);
2510 const LLT s64 = LLT::scalar(64);
2511 const LLT s128 = LLT::scalar(128);
2512 const LLT p0 = LLT::pointer(0, 64);
2513
2514 const Register DefReg = I.getOperand(0).getReg();
2515 const LLT DefTy = MRI.getType(DefReg);
2516 const unsigned DefSize = DefTy.getSizeInBits();
2517 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2518
2519 // FIXME: Redundant check, but even less readable when factored out.
2520 if (isFP) {
2521 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2522 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2523 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2524 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2525 return false;
2526 }
2527
2528 if (RB.getID() != AArch64::FPRRegBankID) {
2529 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2530 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2531 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2532 return false;
2533 }
2534
2535 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2536 // can be sure tablegen works correctly and isn't rescued by this code.
2537 // 0.0 is not covered by tablegen for FP128. So we will handle this
2538 // scenario in the code here.
2539 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2540 return false;
2541 } else {
2542 // s32 and s64 are covered by tablegen.
2543 if (Ty != p0 && Ty != s8 && Ty != s16) {
2544 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2545 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2546 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2547 return false;
2548 }
2549
2550 if (RB.getID() != AArch64::GPRRegBankID) {
2551 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2552 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2553 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2554 return false;
2555 }
2556 }
2557
2558 if (isFP) {
2559 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2560 // For 16, 64, and 128b values, emit a constant pool load.
2561 switch (DefSize) {
2562 default:
2563 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2563)
;
2564 case 32:
2565 // For s32, use a cp load if we have optsize/minsize.
2566 if (!shouldOptForSize(&MF))
2567 break;
2568 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2569 case 16:
2570 case 64:
2571 case 128: {
2572 auto *FPImm = I.getOperand(1).getFPImm();
2573 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2574 if (!LoadMI) {
2575 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2576 return false;
2577 }
2578 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2579 I.eraseFromParent();
2580 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2581 }
2582 }
2583
2584 // Either emit a FMOV, or emit a copy to emit a normal mov.
2585 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2586, __extension__ __PRETTY_FUNCTION__))
2586 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2586, __extension__ __PRETTY_FUNCTION__))
;
2587 const Register DefGPRReg =
2588 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2589 MachineOperand &RegOp = I.getOperand(0);
2590 RegOp.setReg(DefGPRReg);
2591 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2592 MIB.buildCopy({DefReg}, {DefGPRReg});
2593
2594 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2595 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2596 return false;
2597 }
2598
2599 MachineOperand &ImmOp = I.getOperand(1);
2600 // FIXME: Is going through int64_t always correct?
2601 ImmOp.ChangeToImmediate(
2602 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2603 } else if (I.getOperand(1).isCImm()) {
2604 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2605 I.getOperand(1).ChangeToImmediate(Val);
2606 } else if (I.getOperand(1).isImm()) {
2607 uint64_t Val = I.getOperand(1).getImm();
2608 I.getOperand(1).ChangeToImmediate(Val);
2609 }
2610
2611 const unsigned MovOpc =
2612 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2613 I.setDesc(TII.get(MovOpc));
2614 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2615 return true;
2616 }
2617 case TargetOpcode::G_EXTRACT: {
2618 Register DstReg = I.getOperand(0).getReg();
2619 Register SrcReg = I.getOperand(1).getReg();
2620 LLT SrcTy = MRI.getType(SrcReg);
2621 LLT DstTy = MRI.getType(DstReg);
2622 (void)DstTy;
2623 unsigned SrcSize = SrcTy.getSizeInBits();
2624
2625 if (SrcTy.getSizeInBits() > 64) {
2626 // This should be an extract of an s128, which is like a vector extract.
2627 if (SrcTy.getSizeInBits() != 128)
2628 return false;
2629 // Only support extracting 64 bits from an s128 at the moment.
2630 if (DstTy.getSizeInBits() != 64)
2631 return false;
2632
2633 unsigned Offset = I.getOperand(2).getImm();
2634 if (Offset % 64 != 0)
2635 return false;
2636
2637 // Check we have the right regbank always.
2638 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2639 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2640 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2640, __extension__ __PRETTY_FUNCTION__))
;
2641
2642 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2643 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2644 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2645 I.eraseFromParent();
2646 return true;
2647 }
2648
2649 // Emit the same code as a vector extract.
2650 // Offset must be a multiple of 64.
2651 unsigned LaneIdx = Offset / 64;
2652 MachineInstr *Extract = emitExtractVectorElt(
2653 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2654 if (!Extract)
2655 return false;
2656 I.eraseFromParent();
2657 return true;
2658 }
2659
2660 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2661 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2662 Ty.getSizeInBits() - 1);
2663
2664 if (SrcSize < 64) {
2665 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2666, __extension__ __PRETTY_FUNCTION__))
2666 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2666, __extension__ __PRETTY_FUNCTION__))
;
2667 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2668 }
2669
2670 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2671 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2672 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2673 .addReg(DstReg, 0, AArch64::sub_32);
2674 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2675 AArch64::GPR32RegClass, MRI);
2676 I.getOperand(0).setReg(DstReg);
2677
2678 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2679 }
2680
2681 case TargetOpcode::G_INSERT: {
2682 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2683 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2684 unsigned DstSize = DstTy.getSizeInBits();
2685 // Larger inserts are vectors, same-size ones should be something else by
2686 // now (split up or turned into COPYs).
2687 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2688 return false;
2689
2690 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2691 unsigned LSB = I.getOperand(3).getImm();
2692 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2693 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2694 MachineInstrBuilder(MF, I).addImm(Width - 1);
2695
2696 if (DstSize < 64) {
2697 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2698, __extension__ __PRETTY_FUNCTION__))
2698 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2698, __extension__ __PRETTY_FUNCTION__))
;
2699 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2700 }
2701
2702 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2703 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2704 TII.get(AArch64::SUBREG_TO_REG))
2705 .addDef(SrcReg)
2706 .addImm(0)
2707 .addUse(I.getOperand(2).getReg())
2708 .addImm(AArch64::sub_32);
2709 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2710 AArch64::GPR32RegClass, MRI);
2711 I.getOperand(2).setReg(SrcReg);
2712
2713 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2714 }
2715 case TargetOpcode::G_FRAME_INDEX: {
2716 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2717 if (Ty != LLT::pointer(0, 64)) {
2718 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2719 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2720 return false;
2721 }
2722 I.setDesc(TII.get(AArch64::ADDXri));
2723
2724 // MOs for a #0 shifted immediate.
2725 I.addOperand(MachineOperand::CreateImm(0));
2726 I.addOperand(MachineOperand::CreateImm(0));
2727
2728 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2729 }
2730
2731 case TargetOpcode::G_GLOBAL_VALUE: {
2732 auto GV = I.getOperand(1).getGlobal();
2733 if (GV->isThreadLocal())
2734 return selectTLSGlobalValue(I, MRI);
2735
2736 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2737 if (OpFlags & AArch64II::MO_GOT) {
2738 I.setDesc(TII.get(AArch64::LOADgot));
2739 I.getOperand(1).setTargetFlags(OpFlags);
2740 } else if (TM.getCodeModel() == CodeModel::Large) {
2741 // Materialize the global using movz/movk instructions.
2742 materializeLargeCMVal(I, GV, OpFlags);
2743 I.eraseFromParent();
2744 return true;
2745 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2746 I.setDesc(TII.get(AArch64::ADR));
2747 I.getOperand(1).setTargetFlags(OpFlags);
2748 } else {
2749 I.setDesc(TII.get(AArch64::MOVaddr));
2750 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2751 MachineInstrBuilder MIB(MF, I);
2752 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2753 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2754 }
2755 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2756 }
2757
2758 case TargetOpcode::G_ZEXTLOAD:
2759 case TargetOpcode::G_LOAD:
2760 case TargetOpcode::G_STORE: {
2761 GLoadStore &LdSt = cast<GLoadStore>(I);
2762 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2763 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2764
2765 if (PtrTy != LLT::pointer(0, 64)) {
2766 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2767 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2768 return false;
2769 }
2770
2771 uint64_t MemSizeInBytes = LdSt.getMemSize();
2772 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2773 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2774
2775 // Need special instructions for atomics that affect ordering.
2776 if (Order != AtomicOrdering::NotAtomic &&
2777 Order != AtomicOrdering::Unordered &&
2778 Order != AtomicOrdering::Monotonic) {
2779 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2779, __extension__ __PRETTY_FUNCTION__))
;
2780 if (MemSizeInBytes > 64)
2781 return false;
2782
2783 if (isa<GLoad>(LdSt)) {
2784 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2785 AArch64::LDARW, AArch64::LDARX};
2786 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2787 } else {
2788 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2789 AArch64::STLRW, AArch64::STLRX};
2790 Register ValReg = LdSt.getReg(0);
2791 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2792 // Emit a subreg copy of 32 bits.
2793 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2794 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2795 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2796 I.getOperand(0).setReg(NewVal);
2797 }
2798 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2799 }
2800 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2801 return true;
2802 }
2803
2804#ifndef NDEBUG
2805 const Register PtrReg = LdSt.getPointerReg();
2806 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2807 // Check that the pointer register is valid.
2808 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2809, __extension__ __PRETTY_FUNCTION__))
2809 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2809, __extension__ __PRETTY_FUNCTION__))
;
2810 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2811, __extension__ __PRETTY_FUNCTION__))
2811 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2811, __extension__ __PRETTY_FUNCTION__))
;
2812#endif
2813
2814 const Register ValReg = LdSt.getReg(0);
2815 const LLT ValTy = MRI.getType(ValReg);
2816 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2817
2818 // The code below doesn't support truncating stores, so we need to split it
2819 // again.
2820 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2821 unsigned SubReg;
2822 LLT MemTy = LdSt.getMMO().getMemoryType();
2823 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2824 if (!getSubRegForClass(RC, TRI, SubReg))
2825 return false;
2826
2827 // Generate a subreg copy.
2828 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2829 .addReg(ValReg, 0, SubReg)
2830 .getReg(0);
2831 RBI.constrainGenericRegister(Copy, *RC, MRI);
2832 LdSt.getOperand(0).setReg(Copy);
2833 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2834 // If this is an any-extending load from the FPR bank, split it into a regular
2835 // load + extend.
2836 if (RB.getID() == AArch64::FPRRegBankID) {
2837 unsigned SubReg;
2838 LLT MemTy = LdSt.getMMO().getMemoryType();
2839 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2840 if (!getSubRegForClass(RC, TRI, SubReg))
2841 return false;
2842 Register OldDst = LdSt.getReg(0);
2843 Register NewDst =
2844 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2845 LdSt.getOperand(0).setReg(NewDst);
2846 MRI.setRegBank(NewDst, RB);
2847 // Generate a SUBREG_TO_REG to extend it.
2848 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2849 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2850 .addImm(0)
2851 .addUse(NewDst)
2852 .addImm(SubReg);
2853 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2854 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2855 MIB.setInstr(LdSt);
2856 }
2857 }
2858
2859 // Helper lambda for partially selecting I. Either returns the original
2860 // instruction with an updated opcode, or a new instruction.
2861 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2862 bool IsStore = isa<GStore>(I);
2863 const unsigned NewOpc =
2864 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2865 if (NewOpc == I.getOpcode())
2866 return nullptr;
2867 // Check if we can fold anything into the addressing mode.
2868 auto AddrModeFns =
2869 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2870 if (!AddrModeFns) {
2871 // Can't fold anything. Use the original instruction.
2872 I.setDesc(TII.get(NewOpc));
2873 I.addOperand(MachineOperand::CreateImm(0));
2874 return &I;
2875 }
2876
2877 // Folded something. Create a new instruction and return it.
2878 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2879 Register CurValReg = I.getOperand(0).getReg();
2880 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2881 NewInst.cloneMemRefs(I);
2882 for (auto &Fn : *AddrModeFns)
2883 Fn(NewInst);
2884 I.eraseFromParent();
2885 return &*NewInst;
2886 };
2887
2888 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2889 if (!LoadStore)
2890 return false;
2891
2892 // If we're storing a 0, use WZR/XZR.
2893 if (Opcode == TargetOpcode::G_STORE) {
2894 auto CVal = getIConstantVRegValWithLookThrough(
2895 LoadStore->getOperand(0).getReg(), MRI);
2896 if (CVal && CVal->Value == 0) {
2897 switch (LoadStore->getOpcode()) {
2898 case AArch64::STRWui:
2899 case AArch64::STRHHui:
2900 case AArch64::STRBBui:
2901 LoadStore->getOperand(0).setReg(AArch64::WZR);
2902 break;
2903 case AArch64::STRXui:
2904 LoadStore->getOperand(0).setReg(AArch64::XZR);
2905 break;
2906 }
2907 }
2908 }
2909
2910 if (IsZExtLoad) {
2911 // The zextload from a smaller type to i32 should be handled by the
2912 // importer.
2913 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2914 return false;
2915 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2916 // and zero_extend with SUBREG_TO_REG.
2917 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2918 Register DstReg = LoadStore->getOperand(0).getReg();
2919 LoadStore->getOperand(0).setReg(LdReg);
2920
2921 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2922 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2923 .addImm(0)
2924 .addUse(LdReg)
2925 .addImm(AArch64::sub_32);
2926 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2927 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2928 MRI);
2929 }
2930 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2931 }
2932
2933 case TargetOpcode::G_SMULH:
2934 case TargetOpcode::G_UMULH: {
2935 // Reject the various things we don't support yet.
2936 if (unsupportedBinOp(I, RBI, MRI, TRI))
2937 return false;
2938
2939 const Register DefReg = I.getOperand(0).getReg();
2940 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2941
2942 if (RB.getID() != AArch64::GPRRegBankID) {
2943 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2944 return false;
2945 }
2946
2947 if (Ty != LLT::scalar(64)) {
2948 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2949 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2950 return false;
2951 }
2952
2953 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2954 : AArch64::UMULHrr;
2955 I.setDesc(TII.get(NewOpc));
2956
2957 // Now that we selected an opcode, we need to constrain the register
2958 // operands to use appropriate classes.
2959 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2960 }
2961 case TargetOpcode::G_LSHR:
2962 case TargetOpcode::G_ASHR:
2963 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2964 return selectVectorAshrLshr(I, MRI);
2965 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2966 case TargetOpcode::G_SHL:
2967 if (Opcode == TargetOpcode::G_SHL &&
2968 MRI.getType(I.getOperand(0).getReg()).isVector())
2969 return selectVectorSHL(I, MRI);
2970
2971 // These shifts were legalized to have 64 bit shift amounts because we
2972 // want to take advantage of the selection patterns that assume the
2973 // immediates are s64s, however, selectBinaryOp will assume both operands
2974 // will have the same bit size.
2975 {
2976 Register SrcReg = I.getOperand(1).getReg();
2977 Register ShiftReg = I.getOperand(2).getReg();
2978 const LLT ShiftTy = MRI.getType(ShiftReg);
2979 const LLT SrcTy = MRI.getType(SrcReg);
2980 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
2981 ShiftTy.getSizeInBits() == 64) {
2982 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2982, __extension__ __PRETTY_FUNCTION__))
;
2983 // Insert a subregister copy to implement a 64->32 trunc
2984 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
2985 .addReg(ShiftReg, 0, AArch64::sub_32);
2986 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2987 I.getOperand(2).setReg(Trunc.getReg(0));
2988 }
2989 }
2990 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2991 case TargetOpcode::G_OR: {
2992 // Reject the various things we don't support yet.
2993 if (unsupportedBinOp(I, RBI, MRI, TRI))
2994 return false;
2995
2996 const unsigned OpSize = Ty.getSizeInBits();
2997
2998 const Register DefReg = I.getOperand(0).getReg();
2999 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3000
3001 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3002 if (NewOpc == I.getOpcode())
3003 return false;
3004
3005 I.setDesc(TII.get(NewOpc));
3006 // FIXME: Should the type be always reset in setDesc?
3007
3008 // Now that we selected an opcode, we need to constrain the register
3009 // operands to use appropriate classes.
3010 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3011 }
3012
3013 case TargetOpcode::G_PTR_ADD: {
3014 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3015 I.eraseFromParent();
3016 return true;
3017 }
3018 case TargetOpcode::G_SADDO:
3019 case TargetOpcode::G_UADDO:
3020 case TargetOpcode::G_SSUBO:
3021 case TargetOpcode::G_USUBO: {
3022 // Emit the operation and get the correct condition code.
3023 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3024 I.getOperand(2), I.getOperand(3), MIB);
3025
3026 // Now, put the overflow result in the register given by the first operand
3027 // to the overflow op. CSINC increments the result when the predicate is
3028 // false, so to get the increment when it's true, we need to use the
3029 // inverse. In this case, we want to increment when carry is set.
3030 Register ZReg = AArch64::WZR;
3031 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3032 getInvertedCondCode(OpAndCC.second), MIB);
3033 I.eraseFromParent();
3034 return true;
3035 }
3036
3037 case TargetOpcode::G_PTRMASK: {
3038 Register MaskReg = I.getOperand(2).getReg();
3039 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3040 // TODO: Implement arbitrary cases
3041 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3042 return false;
3043
3044 uint64_t Mask = *MaskVal;
3045 I.setDesc(TII.get(AArch64::ANDXri));
3046 I.getOperand(2).ChangeToImmediate(
3047 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3048
3049 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3050 }
3051 case TargetOpcode::G_PTRTOINT:
3052 case TargetOpcode::G_TRUNC: {
3053 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3054 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3055
3056 const Register DstReg = I.getOperand(0).getReg();
3057 const Register SrcReg = I.getOperand(1).getReg();
3058
3059 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3060 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3061
3062 if (DstRB.getID() != SrcRB.getID()) {
3063 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3064 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3065 return false;
3066 }
3067
3068 if (DstRB.getID() == AArch64::GPRRegBankID) {
3069 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3070 if (!DstRC)
3071 return false;
3072
3073 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3074 if (!SrcRC)
3075 return false;
3076
3077 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3078 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3079 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3080 return false;
3081 }
3082
3083 if (DstRC == SrcRC) {
3084 // Nothing to be done
3085 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3086 SrcTy == LLT::scalar(64)) {
3087 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3087)
;
3088 return false;
3089 } else if (DstRC == &AArch64::GPR32RegClass &&
3090 SrcRC == &AArch64::GPR64RegClass) {
3091 I.getOperand(1).setSubReg(AArch64::sub_32);
3092 } else {
3093 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3094 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3095 return false;
3096 }
3097
3098 I.setDesc(TII.get(TargetOpcode::COPY));
3099 return true;
3100 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3101 if (DstTy == LLT::fixed_vector(4, 16) &&
3102 SrcTy == LLT::fixed_vector(4, 32)) {
3103 I.setDesc(TII.get(AArch64::XTNv4i16));
3104 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3105 return true;
3106 }
3107
3108 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3109 MachineInstr *Extract = emitExtractVectorElt(
3110 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3111 if (!Extract)
3112 return false;
3113 I.eraseFromParent();
3114 return true;
3115 }
3116
3117 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3118 if (Opcode == TargetOpcode::G_PTRTOINT) {
3119 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3119, __extension__ __PRETTY_FUNCTION__))
;
3120 I.setDesc(TII.get(TargetOpcode::COPY));
3121 return selectCopy(I, TII, MRI, TRI, RBI);
3122 }
3123 }
3124
3125 return false;
3126 }
3127
3128 case TargetOpcode::G_ANYEXT: {
3129 if (selectUSMovFromExtend(I, MRI))
3130 return true;
3131
3132 const Register DstReg = I.getOperand(0).getReg();
3133 const Register SrcReg = I.getOperand(1).getReg();
3134
3135 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3136 if (RBDst.getID() != AArch64::GPRRegBankID) {
3137 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3138 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3139 return false;
3140 }
3141
3142 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3143 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3144 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3145 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3146 return false;
3147 }
3148
3149 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3150
3151 if (DstSize == 0) {
3152 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3153 return false;
3154 }
3155
3156 if (DstSize != 64 && DstSize > 32) {
3157 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3158 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3159 return false;
3160 }
3161 // At this point G_ANYEXT is just like a plain COPY, but we need
3162 // to explicitly form the 64-bit value if any.
3163 if (DstSize > 32) {
3164 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3165 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3166 .addDef(ExtSrc)
3167 .addImm(0)
3168 .addUse(SrcReg)
3169 .addImm(AArch64::sub_32);
3170 I.getOperand(1).setReg(ExtSrc);
3171 }
3172 return selectCopy(I, TII, MRI, TRI, RBI);
3173 }
3174
3175 case TargetOpcode::G_ZEXT:
3176 case TargetOpcode::G_SEXT_INREG:
3177 case TargetOpcode::G_SEXT: {
3178 if (selectUSMovFromExtend(I, MRI))
3179 return true;
3180
3181 unsigned Opcode = I.getOpcode();
3182 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3183 const Register DefReg = I.getOperand(0).getReg();
3184 Register SrcReg = I.getOperand(1).getReg();
3185 const LLT DstTy = MRI.getType(DefReg);
3186 const LLT SrcTy = MRI.getType(SrcReg);
3187 unsigned DstSize = DstTy.getSizeInBits();
3188 unsigned SrcSize = SrcTy.getSizeInBits();
3189
3190 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3191 // extended is encoded in the imm.
3192 if (Opcode == TargetOpcode::G_SEXT_INREG)
3193 SrcSize = I.getOperand(2).getImm();
3194
3195 if (DstTy.isVector())
3196 return false; // Should be handled by imported patterns.
3197
3198 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3200, __extension__ __PRETTY_FUNCTION__))
3199 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3200, __extension__ __PRETTY_FUNCTION__))
3200 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3200, __extension__ __PRETTY_FUNCTION__))
;
3201
3202 MachineInstr *ExtI;
3203
3204 // First check if we're extending the result of a load which has a dest type
3205 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3206 // GPR register on AArch64 and all loads which are smaller automatically
3207 // zero-extend the upper bits. E.g.
3208 // %v(s8) = G_LOAD %p, :: (load 1)
3209 // %v2(s32) = G_ZEXT %v(s8)
3210 if (!IsSigned) {
3211 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3212 bool IsGPR =
3213 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3214 if (LoadMI && IsGPR) {
3215 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3216 unsigned BytesLoaded = MemOp->getSize();
3217 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3218 return selectCopy(I, TII, MRI, TRI, RBI);
3219 }
3220
3221 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3222 // + SUBREG_TO_REG.
3223 //
3224 // If we are zero extending from 32 bits to 64 bits, it's possible that
3225 // the instruction implicitly does the zero extend for us. In that case,
3226 // we only need the SUBREG_TO_REG.
3227 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3228 // Unlike with the G_LOAD case, we don't want to look through copies
3229 // here. (See isDef32.)
3230 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3231 Register SubregToRegSrc = SrcReg;
3232
3233 // Does the instruction implicitly zero extend?
3234 if (!Def || !isDef32(*Def)) {
3235 // No. Zero out using an OR.
3236 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3237 const Register ZReg = AArch64::WZR;
3238 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3239 SubregToRegSrc = OrDst;
3240 }
3241
3242 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3243 .addImm(0)
3244 .addUse(SubregToRegSrc)
3245 .addImm(AArch64::sub_32);
3246
3247 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3248 MRI)) {
3249 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3250 return false;
3251 }
3252
3253 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3254 MRI)) {
3255 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3256 return false;
3257 }
3258
3259 I.eraseFromParent();
3260 return true;
3261 }
3262 }
3263
3264 if (DstSize == 64) {
3265 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3266 // FIXME: Can we avoid manually doing this?
3267 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3268 MRI)) {
3269 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3270 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3271 return false;
3272 }
3273 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3274 {&AArch64::GPR64RegClass}, {})
3275 .addImm(0)
3276 .addUse(SrcReg)
3277 .addImm(AArch64::sub_32)
3278 .getReg(0);
3279 }
3280
3281 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3282 {DefReg}, {SrcReg})
3283 .addImm(0)
3284 .addImm(SrcSize - 1);
3285 } else if (DstSize <= 32) {
3286 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3287 {DefReg}, {SrcReg})
3288 .addImm(0)
3289 .addImm(SrcSize - 1);
3290 } else {
3291 return false;
3292 }
3293
3294 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3295 I.eraseFromParent();
3296 return true;
3297 }
3298
3299 case TargetOpcode::G_SITOFP:
3300 case TargetOpcode::G_UITOFP:
3301 case TargetOpcode::G_FPTOSI:
3302 case TargetOpcode::G_FPTOUI: {
3303 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3304 SrcTy = MRI.getType(I.getOperand(1).getReg());
3305 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3306 if (NewOpc == Opcode)
3307 return false;
3308
3309 I.setDesc(TII.get(NewOpc));
3310 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3311 I.setFlags(MachineInstr::NoFPExcept);
3312
3313 return true;
3314 }
3315
3316 case TargetOpcode::G_FREEZE:
3317 return selectCopy(I, TII, MRI, TRI, RBI);
3318
3319 case TargetOpcode::G_INTTOPTR:
3320 // The importer is currently unable to import pointer types since they
3321 // didn't exist in SelectionDAG.
3322 return selectCopy(I, TII, MRI, TRI, RBI);
3323
3324 case TargetOpcode::G_BITCAST:
3325 // Imported SelectionDAG rules can handle every bitcast except those that
3326 // bitcast from a type to the same type. Ideally, these shouldn't occur
3327 // but we might not run an optimizer that deletes them. The other exception
3328 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3329 // of them.
3330 return selectCopy(I, TII, MRI, TRI, RBI);
3331
3332 case TargetOpcode::G_SELECT: {
3333 auto &Sel = cast<GSelect>(I);
3334 if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
3335 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3336 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3337 return false;
3338 }
3339
3340 const Register CondReg = Sel.getCondReg();
3341 const Register TReg = Sel.getTrueReg();
3342 const Register FReg = Sel.getFalseReg();
3343
3344 if (tryOptSelect(Sel))
3345 return true;
3346
3347 // Make sure to use an unused vreg instead of wzr, so that the peephole
3348 // optimizations will be able to optimize these.
3349 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3350 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3351 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3352 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3353 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3354 return false;
3355 Sel.eraseFromParent();
3356 return true;
3357 }
3358 case TargetOpcode::G_ICMP: {
3359 if (Ty.isVector())
3360 return selectVectorICmp(I, MRI);
3361
3362 if (Ty != LLT::scalar(32)) {
3363 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3364 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3365 return false;
3366 }
3367
3368 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3369 const AArch64CC::CondCode InvCC =
3370 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3371 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3372 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3373 /*Src2=*/AArch64::WZR, InvCC, MIB);
3374 I.eraseFromParent();
3375 return true;
3376 }
3377
3378 case TargetOpcode::G_FCMP: {
3379 CmpInst::Predicate Pred =
3380 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3381 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3382 Pred) ||
3383 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3384 return false;
3385 I.eraseFromParent();
3386 return true;
3387 }
3388 case TargetOpcode::G_VASTART:
3389 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3390 : selectVaStartAAPCS(I, MF, MRI);
3391 case TargetOpcode::G_INTRINSIC:
3392 return selectIntrinsic(I, MRI);
3393 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3394 return selectIntrinsicWithSideEffects(I, MRI);
3395 case TargetOpcode::G_IMPLICIT_DEF: {
3396 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3397 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3398 const Register DstReg = I.getOperand(0).getReg();
3399 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3400 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3401 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3402 return true;
3403 }
3404 case TargetOpcode::G_BLOCK_ADDR: {
3405 if (TM.getCodeModel() == CodeModel::Large) {
3406 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3407 I.eraseFromParent();
3408 return true;
3409 } else {
3410 I.setDesc(TII.get(AArch64::MOVaddrBA));
3411 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3412 I.getOperand(0).getReg())
3413 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3414 /* Offset */ 0, AArch64II::MO_PAGE)
3415 .addBlockAddress(
3416 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3417 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3418 I.eraseFromParent();
3419 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3420 }
3421 }
3422 case AArch64::G_DUP: {
3423 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3424 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3425 // difficult because at RBS we may end up pessimizing the fpr case if we
3426 // decided to add an anyextend to fix this. Manual selection is the most
3427 // robust solution for now.
3428 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3429 AArch64::GPRRegBankID)
3430 return false; // We expect the fpr regbank case to be imported.
3431 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3432 if (VecTy == LLT::fixed_vector(8, 8))
3433 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3434 else if (VecTy == LLT::fixed_vector(16, 8))
3435 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3436 else if (VecTy == LLT::fixed_vector(4, 16))
3437 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3438 else if (VecTy == LLT::fixed_vector(8, 16))
3439 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3440 else
3441 return false;
3442 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3443 }
3444 case TargetOpcode::G_INTRINSIC_TRUNC:
3445 return selectIntrinsicTrunc(I, MRI);
3446 case TargetOpcode::G_INTRINSIC_ROUND:
3447 return selectIntrinsicRound(I, MRI);
3448 case TargetOpcode::G_BUILD_VECTOR:
3449 return selectBuildVector(I, MRI);
3450 case TargetOpcode::G_MERGE_VALUES:
3451 return selectMergeValues(I, MRI);
3452 case TargetOpcode::G_UNMERGE_VALUES:
3453 return selectUnmergeValues(I, MRI);
3454 case TargetOpcode::G_SHUFFLE_VECTOR:
3455 return selectShuffleVector(I, MRI);
3456 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3457 return selectExtractElt(I, MRI);
3458 case TargetOpcode::G_INSERT_VECTOR_ELT:
3459 return selectInsertElt(I, MRI);
3460 case TargetOpcode::G_CONCAT_VECTORS:
3461 return selectConcatVectors(I, MRI);
3462 case TargetOpcode::G_JUMP_TABLE:
3463 return selectJumpTable(I, MRI);
3464 case TargetOpcode::G_VECREDUCE_FADD:
3465 case TargetOpcode::G_VECREDUCE_ADD:
3466 return selectReduction(I, MRI);
3467 case TargetOpcode::G_MEMCPY:
3468 case TargetOpcode::G_MEMCPY_INLINE:
3469 case TargetOpcode::G_MEMMOVE:
3470 case TargetOpcode::G_MEMSET:
3471 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3471, __extension__ __PRETTY_FUNCTION__))
;
3472 return selectMOPS(I, MRI);
3473 }
3474
3475 return false;
3476}
3477
3478bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3479 MachineRegisterInfo &MRI) {
3480 Register VecReg = I.getOperand(1).getReg();
3481 LLT VecTy = MRI.getType(VecReg);
3482 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3483 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3484 // a subregister copy afterwards.
3485 if (VecTy == LLT::fixed_vector(2, 32)) {
3486 Register DstReg = I.getOperand(0).getReg();
3487 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3488 {VecReg, VecReg});
3489 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3490 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3491 .getReg(0);
3492 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3493 I.eraseFromParent();
3494 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3495 }
3496
3497 unsigned Opc = 0;
3498 if (VecTy == LLT::fixed_vector(16, 8))
3499 Opc = AArch64::ADDVv16i8v;
3500 else if (VecTy == LLT::fixed_vector(8, 16))
3501 Opc = AArch64::ADDVv8i16v;
3502 else if (VecTy == LLT::fixed_vector(4, 32))
3503 Opc = AArch64::ADDVv4i32v;
3504 else if (VecTy == LLT::fixed_vector(2, 64))
3505 Opc = AArch64::ADDPv2i64p;
3506 else {
3507 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3508 return false;
3509 }
3510 I.setDesc(TII.get(Opc));
3511 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3512 }
3513
3514 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3515 unsigned Opc = 0;
3516 if (VecTy == LLT::fixed_vector(2, 32))
3517 Opc = AArch64::FADDPv2i32p;
3518 else if (VecTy == LLT::fixed_vector(2, 64))
3519 Opc = AArch64::FADDPv2i64p;
3520 else {
3521 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3522 return false;
3523 }
3524 I.setDesc(TII.get(Opc));
3525 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3526 }
3527 return false;
3528}
3529
3530bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3531 MachineRegisterInfo &MRI) {
3532 unsigned Mopcode;
3533 switch (GI.getOpcode()) {
3534 case TargetOpcode::G_MEMCPY:
3535 case TargetOpcode::G_MEMCPY_INLINE:
3536 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3537 break;
3538 case TargetOpcode::G_MEMMOVE:
3539 Mopcode = AArch64::MOPSMemoryMovePseudo;
3540 break;
3541 case TargetOpcode::G_MEMSET:
3542 // For tagged memset see llvm.aarch64.mops.memset.tag
3543 Mopcode = AArch64::MOPSMemorySetPseudo;
3544 break;
3545 }
3546
3547 auto &DstPtr = GI.getOperand(0);
3548 auto &SrcOrVal = GI.getOperand(1);
3549 auto &Size = GI.getOperand(2);
3550
3551 // Create copies of the registers that can be clobbered.
3552 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3553 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3554 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3555
3556 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3557 const auto &SrcValRegClass =
3558 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3559
3560 // Constrain to specific registers
3561 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3562 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3563 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3564
3565 MIB.buildCopy(DstPtrCopy, DstPtr);
3566 MIB.buildCopy(SrcValCopy, SrcOrVal);
3567 MIB.buildCopy(SizeCopy, Size);
3568
3569 // New instruction uses the copied registers because it must update them.
3570 // The defs are not used since they don't exist in G_MEM*. They are still
3571 // tied.
3572 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3573 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3574 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3575 if (IsSet) {
3576 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3577 {DstPtrCopy, SizeCopy, SrcValCopy});
3578 } else {
3579 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3580 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3581 {DstPtrCopy, SrcValCopy, SizeCopy});
3582 }
3583
3584 GI.eraseFromParent();
3585 return true;
3586}
3587
3588bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3589 MachineRegisterInfo &MRI) {
3590 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3590, __extension__ __PRETTY_FUNCTION__))
;
3591 Register JTAddr = I.getOperand(0).getReg();
3592 unsigned JTI = I.getOperand(1).getIndex();
3593 Register Index = I.getOperand(2).getReg();
3594
3595 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3596 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3597
3598 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3599 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3600 {TargetReg, ScratchReg}, {JTAddr, Index})
3601 .addJumpTableIndex(JTI);
3602 // Build the indirect branch.
3603 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3604 I.eraseFromParent();
3605 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3606}
3607
3608bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3609 MachineRegisterInfo &MRI) {
3610 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3610, __extension__ __PRETTY_FUNCTION__))
;
3611 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3611, __extension__ __PRETTY_FUNCTION__))
;
3612
3613 Register DstReg = I.getOperand(0).getReg();
3614 unsigned JTI = I.getOperand(1).getIndex();
3615 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3616 auto MovMI =
3617 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3618 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3619 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3620 I.eraseFromParent();
3621 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3622}
3623
3624bool AArch64InstructionSelector::selectTLSGlobalValue(
3625 MachineInstr &I, MachineRegisterInfo &MRI) {
3626 if (!STI.isTargetMachO())
3627 return false;
3628 MachineFunction &MF = *I.getParent()->getParent();
3629 MF.getFrameInfo().setAdjustsStack(true);
3630
3631 const auto &GlobalOp = I.getOperand(1);
3632 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3633, __extension__ __PRETTY_FUNCTION__))
3633 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3633, __extension__ __PRETTY_FUNCTION__))
;
3634 const GlobalValue &GV = *GlobalOp.getGlobal();
3635
3636 auto LoadGOT =
3637 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3638 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3639
3640 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3641 {LoadGOT.getReg(0)})
3642 .addImm(0);
3643
3644 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3645 // TLS calls preserve all registers except those that absolutely must be
3646 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3647 // silly).
3648 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3649 .addUse(AArch64::X0, RegState::Implicit)
3650 .addDef(AArch64::X0, RegState::Implicit)
3651 .addRegMask(TRI.getTLSCallPreservedMask());
3652
3653 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3654 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3655 MRI);
3656 I.eraseFromParent();
3657 return true;
3658}
3659
3660bool AArch64InstructionSelector::selectIntrinsicTrunc(
3661 MachineInstr &I, MachineRegisterInfo &MRI) const {
3662 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3663
3664 // Select the correct opcode.
3665 unsigned Opc = 0;
3666 if (!SrcTy.isVector()) {
3667 switch (SrcTy.getSizeInBits()) {
3668 default:
3669 case 16:
3670 Opc = AArch64::FRINTZHr;
3671 break;
3672 case 32:
3673 Opc = AArch64::FRINTZSr;
3674 break;
3675 case 64:
3676 Opc = AArch64::FRINTZDr;
3677 break;
3678 }
3679 } else {
3680 unsigned NumElts = SrcTy.getNumElements();
3681 switch (SrcTy.getElementType().getSizeInBits()) {
3682 default:
3683 break;
3684 case 16:
3685 if (NumElts == 4)
3686 Opc = AArch64::FRINTZv4f16;
3687 else if (NumElts == 8)
3688 Opc = AArch64::FRINTZv8f16;
3689 break;
3690 case 32:
3691 if (NumElts == 2)
3692 Opc = AArch64::FRINTZv2f32;
3693 else if (NumElts == 4)
3694 Opc = AArch64::FRINTZv4f32;
3695 break;
3696 case 64:
3697 if (NumElts == 2)
3698 Opc = AArch64::FRINTZv2f64;
3699 break;
3700 }
3701 }
3702
3703 if (!Opc) {
3704 // Didn't get an opcode above, bail.
3705 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3706 return false;
3707 }
3708
3709 // Legalization would have set us up perfectly for this; we just need to
3710 // set the opcode and move on.
3711 I.setDesc(TII.get(Opc));
3712 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3713}
3714
3715bool AArch64InstructionSelector::selectIntrinsicRound(
3716 MachineInstr &I, MachineRegisterInfo &MRI) const {
3717 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3718
3719 // Select the correct opcode.
3720 unsigned Opc = 0;
3721 if (!SrcTy.isVector()) {
3722 switch (SrcTy.getSizeInBits()) {
3723 default:
3724 case 16:
3725 Opc = AArch64::FRINTAHr;
3726 break;
3727 case 32:
3728 Opc = AArch64::FRINTASr;
3729 break;
3730 case 64:
3731 Opc = AArch64::FRINTADr;
3732 break;
3733 }
3734 } else {
3735 unsigned NumElts = SrcTy.getNumElements();
3736 switch (SrcTy.getElementType().getSizeInBits()) {
3737 default:
3738 break;
3739 case 16:
3740 if (NumElts == 4)
3741 Opc = AArch64::FRINTAv4f16;
3742 else if (NumElts == 8)
3743 Opc = AArch64::FRINTAv8f16;
3744 break;
3745 case 32:
3746 if (NumElts == 2)
3747 Opc = AArch64::FRINTAv2f32;
3748 else if (NumElts == 4)
3749 Opc = AArch64::FRINTAv4f32;
3750 break;
3751 case 64:
3752 if (NumElts == 2)
3753 Opc = AArch64::FRINTAv2f64;
3754 break;
3755 }
3756 }
3757
3758 if (!Opc) {
3759 // Didn't get an opcode above, bail.
3760 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3761 return false;
3762 }
3763
3764 // Legalization would have set us up perfectly for this; we just need to
3765 // set the opcode and move on.
3766 I.setDesc(TII.get(Opc));
3767 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3768}
3769
3770bool AArch64InstructionSelector::selectVectorICmp(
3771 MachineInstr &I, MachineRegisterInfo &MRI) {
3772 Register DstReg = I.getOperand(0).getReg();
3773 LLT DstTy = MRI.getType(DstReg);
3774 Register SrcReg = I.getOperand(2).getReg();
3775 Register Src2Reg = I.getOperand(3).getReg();
3776 LLT SrcTy = MRI.getType(SrcReg);
3777
3778 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3779 unsigned NumElts = DstTy.getNumElements();
3780
3781 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3782 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3783 // Third index is cc opcode:
3784 // 0 == eq
3785 // 1 == ugt
3786 // 2 == uge
3787 // 3 == ult
3788 // 4 == ule
3789 // 5 == sgt
3790 // 6 == sge
3791 // 7 == slt
3792 // 8 == sle
3793 // ne is done by negating 'eq' result.
3794
3795 // This table below assumes that for some comparisons the operands will be
3796 // commuted.
3797 // ult op == commute + ugt op
3798 // ule op == commute + uge op
3799 // slt op == commute + sgt op
3800 // sle op == commute + sge op
3801 unsigned PredIdx = 0;
3802 bool SwapOperands = false;
3803 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
1
Calling 'MachineOperand::getPredicate'
4
Returning from 'MachineOperand::getPredicate'
3804 switch (Pred) {
5
Control jumps to 'case ICMP_SGE:' at line 3826
3805 case CmpInst::ICMP_NE:
3806 case CmpInst::ICMP_EQ:
3807 PredIdx = 0;
3808 break;
3809 case CmpInst::ICMP_UGT:
3810 PredIdx = 1;
3811 break;
3812 case CmpInst::ICMP_UGE:
3813 PredIdx = 2;
3814 break;
3815 case CmpInst::ICMP_ULT:
3816 PredIdx = 3;
3817 SwapOperands = true;
3818 break;
3819 case CmpInst::ICMP_ULE:
3820 PredIdx = 4;
3821 SwapOperands = true;
3822 break;
3823 case CmpInst::ICMP_SGT:
3824 PredIdx = 5;
3825 break;
3826 case CmpInst::ICMP_SGE:
3827 PredIdx = 6;
6
The value 6 is assigned to 'PredIdx'
3828 break;
7
Execution continues on line 3845
3829 case CmpInst::ICMP_SLT:
3830 PredIdx = 7;
3831 SwapOperands = true;
3832 break;
3833 case CmpInst::ICMP_SLE:
3834 PredIdx = 8;
3835 SwapOperands = true;
3836 break;
3837 default:
3838 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3838)
;
3839 return false;
3840 }
3841
3842 // This table obviously should be tablegen'd when we have our GISel native
3843 // tablegen selector.
3844
3845 static const unsigned OpcTable[4][4][9] = {
3846 {
3847 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3848 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3849 0 /* invalid */},
3850 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3851 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3852 0 /* invalid */},
3853 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3854 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3855 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3856 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3857 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3858 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3859 },
3860 {
3861 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3862 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3863 0 /* invalid */},
3864 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3865 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3866 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3867 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3868 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3869 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3870 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3871 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3872 0 /* invalid */}
3873 },
3874 {
3875 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3876 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3877 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3878 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3879 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3880 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3881 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3882 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3883 0 /* invalid */},
3884 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3885 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3886 0 /* invalid */}
3887 },
3888 {
3889 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3890 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3891 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3892 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3893 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3894 0 /* invalid */},
3895 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3896 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3897 0 /* invalid */},
3898 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3899 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3900 0 /* invalid */}
3901 },
3902 };
3903 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3904 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3905 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
8
Assigned value is garbage or undefined
3906 if (!Opc) {
3907 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3908 return false;
3909 }
3910
3911 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3912 const TargetRegisterClass *SrcRC =
3913 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3914 if (!SrcRC) {
3915 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3916 return false;
3917 }
3918
3919 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3920 if (SrcTy.getSizeInBits() == 128)
3921 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3922
3923 if (SwapOperands)
3924 std::swap(SrcReg, Src2Reg);
3925
3926 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3927 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3928
3929 // Invert if we had a 'ne' cc.
3930 if (NotOpc) {
3931 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3932 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3933 } else {
3934 MIB.buildCopy(DstReg, Cmp.getReg(0));
3935 }
3936 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3937 I.eraseFromParent();
3938 return true;
3939}
3940
3941MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3942 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3943 MachineIRBuilder &MIRBuilder) const {
3944 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3945
3946 auto BuildFn = [&](unsigned SubregIndex) {
3947 auto Ins =
3948 MIRBuilder
3949 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3950 .addImm(SubregIndex);
3951 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3952 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3953 return &*Ins;
3954 };
3955
3956 switch (EltSize) {
3957 case 16:
3958 return BuildFn(AArch64::hsub);
3959 case 32:
3960 return BuildFn(AArch64::ssub);
3961 case 64:
3962 return BuildFn(AArch64::dsub);
3963 default:
3964 return nullptr;
3965 }
3966}
3967
3968bool AArch64InstructionSelector::selectMergeValues(
3969 MachineInstr &I, MachineRegisterInfo &MRI) {
3970 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3970, __extension__ __PRETTY_FUNCTION__))
;
3971 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3972 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3973 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3973, __extension__ __PRETTY_FUNCTION__))
;
3974 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3975
3976 if (I.getNumOperands() != 3)
3977 return false;
3978
3979 // Merging 2 s64s into an s128.
3980 if (DstTy == LLT::scalar(128)) {
3981 if (SrcTy.getSizeInBits() != 64)
3982 return false;
3983 Register DstReg = I.getOperand(0).getReg();
3984 Register Src1Reg = I.getOperand(1).getReg();
3985 Register Src2Reg = I.getOperand(2).getReg();
3986 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3987 MachineInstr *InsMI =
3988 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3989 if (!InsMI)
3990 return false;
3991 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3992 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3993 if (!Ins2MI)
3994 return false;
3995 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3996 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3997 I.eraseFromParent();
3998 return true;
3999 }
4000
4001 if (RB.getID() != AArch64::GPRRegBankID)
4002 return false;
4003
4004 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4005 return false;
4006
4007 auto *DstRC = &AArch64::GPR64RegClass;
4008 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4009 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4010 TII.get(TargetOpcode::SUBREG_TO_REG))
4011 .addDef(SubToRegDef)
4012 .addImm(0)
4013 .addUse(I.getOperand(1).getReg())
4014 .addImm(AArch64::sub_32);
4015 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4016 // Need to anyext the second scalar before we can use bfm
4017 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4018 TII.get(TargetOpcode::SUBREG_TO_REG))
4019 .addDef(SubToRegDef2)
4020 .addImm(0)
4021 .addUse(I.getOperand(2).getReg())
4022 .addImm(AArch64::sub_32);
4023 MachineInstr &BFM =
4024 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4025 .addDef(I.getOperand(0).getReg())
4026 .addUse(SubToRegDef)
4027 .addUse(SubToRegDef2)
4028 .addImm(32)
4029 .addImm(31);
4030 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4031 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4032 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4033 I.eraseFromParent();
4034 return true;
4035}
4036
4037static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4038 const unsigned EltSize) {
4039 // Choose a lane copy opcode and subregister based off of the size of the
4040 // vector's elements.
4041 switch (EltSize) {
4042 case 8:
4043 CopyOpc = AArch64::DUPi8;
4044 ExtractSubReg = AArch64::bsub;
4045 break;
4046 case 16:
4047 CopyOpc = AArch64::DUPi16;
4048 ExtractSubReg = AArch64::hsub;
4049 break;
4050 case 32:
4051 CopyOpc = AArch64::DUPi32;
4052 ExtractSubReg = AArch64::ssub;
4053 break;
4054 case 64:
4055 CopyOpc = AArch64::DUPi64;
4056 ExtractSubReg = AArch64::dsub;
4057 break;
4058 default:
4059 // Unknown size, bail out.
4060 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4061 return false;
4062 }
4063 return true;
4064}
4065
4066MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4067 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4068 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4069 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4070 unsigned CopyOpc = 0;
4071 unsigned ExtractSubReg = 0;
4072 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4073 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4074 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4075 return nullptr;
4076 }
4077
4078 const TargetRegisterClass *DstRC =
4079 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4080 if (!DstRC) {
4081 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4082 return nullptr;
4083 }
4084
4085 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4086 const LLT &VecTy = MRI.getType(VecReg);
4087 const TargetRegisterClass *VecRC =
4088 getRegClassForTypeOnBank(VecTy, VecRB, true);
4089 if (!VecRC) {
4090 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4091 return nullptr;
4092 }
4093
4094 // The register that we're going to copy into.
4095 Register InsertReg = VecReg;
4096 if (!DstReg)
4097 DstReg = MRI.createVirtualRegister(DstRC);
4098 // If the lane index is 0, we just use a subregister COPY.
4099 if (LaneIdx == 0) {
4100 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4101 .addReg(VecReg, 0, ExtractSubReg);
4102 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4103 return &*Copy;
4104 }
4105
4106 // Lane copies require 128-bit wide registers. If we're dealing with an
4107 // unpacked vector, then we need to move up to that width. Insert an implicit
4108 // def and a subregister insert to get us there.
4109 if (VecTy.getSizeInBits() != 128) {
4110 MachineInstr *ScalarToVector = emitScalarToVector(
4111 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4112 if (!ScalarToVector)
4113 return nullptr;
4114 InsertReg = ScalarToVector->getOperand(0).getReg();
4115 }
4116
4117 MachineInstr *LaneCopyMI =
4118 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4119 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4120
4121 // Make sure that we actually constrain the initial copy.
4122 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4123 return LaneCopyMI;
4124}
4125
4126bool AArch64InstructionSelector::selectExtractElt(
4127 MachineInstr &I, MachineRegisterInfo &MRI) {
4128 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4129, __extension__ __PRETTY_FUNCTION__))
4129 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4129, __extension__ __PRETTY_FUNCTION__))
;
4130 Register DstReg = I.getOperand(0).getReg();
4131 const LLT NarrowTy = MRI.getType(DstReg);
4132 const Register SrcReg = I.getOperand(1).getReg();
4133 const LLT WideTy = MRI.getType(SrcReg);
4134 (void)WideTy;
4135 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4136, __extension__ __PRETTY_FUNCTION__))
4136 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4136, __extension__ __PRETTY_FUNCTION__))
;
4137 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4137, __extension__ __PRETTY_FUNCTION__))
;
4138
4139 // Need the lane index to determine the correct copy opcode.
4140 MachineOperand &LaneIdxOp = I.getOperand(2);
4141 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4141, __extension__ __PRETTY_FUNCTION__))
;
4142
4143 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4144 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4145 return false;
4146 }
4147
4148 // Find the index to extract from.
4149 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4150 if (!VRegAndVal)
4151 return false;
4152 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4153
4154
4155 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4156 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4157 LaneIdx, MIB);
4158 if (!Extract)
4159 return false;
4160
4161 I.eraseFromParent();
4162 return true;
4163}
4164
4165bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4166 MachineInstr &I, MachineRegisterInfo &MRI) {
4167 unsigned NumElts = I.getNumOperands() - 1;
4168 Register SrcReg = I.getOperand(NumElts).getReg();
4169 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4170 const LLT SrcTy = MRI.getType(SrcReg);
4171
4172 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __extension__ __PRETTY_FUNCTION__))
;
4173 if (SrcTy.getSizeInBits() > 128) {
4174 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4175 return false;
4176 }
4177
4178 // We implement a split vector operation by treating the sub-vectors as
4179 // scalars and extracting them.
4180 const RegisterBank &DstRB =
4181 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4182 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4183 Register Dst = I.getOperand(OpIdx).getReg();
4184 MachineInstr *Extract =
4185 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4186 if (!Extract)
4187 return false;
4188 }
4189 I.eraseFromParent();
4190 return true;
4191}
4192
4193bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4194 MachineRegisterInfo &MRI) {
4195 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4196, __extension__ __PRETTY_FUNCTION__))
4196 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4196, __extension__ __PRETTY_FUNCTION__))
;
4197
4198 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4199 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4200 AArch64::FPRRegBankID ||
4201 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4202 AArch64::FPRRegBankID) {
4203 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4204 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4205 return false;
4206 }
4207
4208 // The last operand is the vector source register, and every other operand is
4209 // a register to unpack into.
4210 unsigned NumElts = I.getNumOperands() - 1;
4211 Register SrcReg = I.getOperand(NumElts).getReg();
4212 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4213 const LLT WideTy = MRI.getType(SrcReg);
4214 (void)WideTy;
4215 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4216, __extension__ __PRETTY_FUNCTION__))
4216 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4216, __extension__ __PRETTY_FUNCTION__))
;
4217 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4218, __extension__ __PRETTY_FUNCTION__))
4218 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4218, __extension__ __PRETTY_FUNCTION__))
;
4219
4220 if (!NarrowTy.isScalar())
4221 return selectSplitVectorUnmerge(I, MRI);
4222
4223 // Choose a lane copy opcode and subregister based off of the size of the
4224 // vector's elements.
4225 unsigned CopyOpc = 0;
4226 unsigned ExtractSubReg = 0;
4227 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4228 return false;
4229
4230 // Set up for the lane copies.
4231 MachineBasicBlock &MBB = *I.getParent();
4232
4233 // Stores the registers we'll be copying from.
4234 SmallVector<Register, 4> InsertRegs;
4235
4236 // We'll use the first register twice, so we only need NumElts-1 registers.
4237 unsigned NumInsertRegs = NumElts - 1;
4238
4239 // If our elements fit into exactly 128 bits, then we can copy from the source
4240 // directly. Otherwise, we need to do a bit of setup with some subregister
4241 // inserts.
4242 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4243 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4244 } else {
4245 // No. We have to perform subregister inserts. For each insert, create an
4246 // implicit def and a subregister insert, and save the register we create.
4247 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4248 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4249 *RBI.getRegBank(SrcReg, MRI, TRI));
4250 unsigned SubReg = 0;
4251 bool Found = getSubRegForClass(RC, TRI, SubReg);
4252 (void)Found;
4253 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4253, __extension__ __PRETTY_FUNCTION__))
;
4254 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4255 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4256 MachineInstr &ImpDefMI =
4257 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4258 ImpDefReg);
4259
4260 // Now, create the subregister insert from SrcReg.
4261 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4262 MachineInstr &InsMI =
4263 *BuildMI(MBB, I, I.getDebugLoc(),
4264 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4265 .addUse(ImpDefReg)
4266 .addUse(SrcReg)
4267 .addImm(SubReg);
4268
4269 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4270 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4271
4272 // Save the register so that we can copy from it after.
4273 InsertRegs.push_back(InsertReg);
4274 }
4275 }
4276
4277 // Now that we've created any necessary subregister inserts, we can
4278 // create the copies.
4279 //
4280 // Perform the first copy separately as a subregister copy.
4281 Register CopyTo = I.getOperand(0).getReg();
4282 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4283 .addReg(InsertRegs[0], 0, ExtractSubReg);
4284 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4285
4286 // Now, perform the remaining copies as vector lane copies.
4287 unsigned LaneIdx = 1;
4288 for (Register InsReg : InsertRegs) {
4289 Register CopyTo = I.getOperand(LaneIdx).getReg();
4290 MachineInstr &CopyInst =
4291 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4292 .addUse(InsReg)
4293 .addImm(LaneIdx);
4294 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4295 ++LaneIdx;
4296 }
4297
4298 // Separately constrain the first copy's destination. Because of the
4299 // limitation in constrainOperandRegClass, we can't guarantee that this will
4300 // actually be constrained. So, do it ourselves using the second operand.
4301 const TargetRegisterClass *RC =
4302 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4303 if (!RC) {
4304 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4305 return false;
4306 }
4307
4308 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4309 I.eraseFromParent();
4310 return true;
4311}
4312
4313bool AArch64InstructionSelector::selectConcatVectors(
4314 MachineInstr &I, MachineRegisterInfo &MRI) {
4315 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4316, __extension__ __PRETTY_FUNCTION__))
4316 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4316, __extension__ __PRETTY_FUNCTION__))
;
4317 Register Dst = I.getOperand(0).getReg();
4318 Register Op1 = I.getOperand(1).getReg();
4319 Register Op2 = I.getOperand(2).getReg();
4320 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4321 if (!ConcatMI)
4322 return false;
4323 I.eraseFromParent();
4324 return true;
4325}
4326
4327unsigned
4328AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4329 MachineFunction &MF) const {
4330 Type *CPTy = CPVal->getType();
4331 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4332
4333 MachineConstantPool *MCP = MF.getConstantPool();
4334 return MCP->getConstantPoolIndex(CPVal, Alignment);
4335}
4336
4337MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4338 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4339 auto &MF = MIRBuilder.getMF();
4340 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4341
4342 auto Adrp =
4343 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4344 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4345
4346 MachineInstr *LoadMI = nullptr;
4347 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4348 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4349 switch (Size) {
4350 case 16:
4351 LoadMI =
4352 &*MIRBuilder
4353 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4354 .addConstantPoolIndex(CPIdx, 0,
4355 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4356 break;
4357 case 8:
4358 LoadMI =
4359 &*MIRBuilder
4360 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4361 .addConstantPoolIndex(CPIdx, 0,
4362 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4363 break;
4364 case 4:
4365 LoadMI =
4366 &*MIRBuilder
4367 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4368 .addConstantPoolIndex(CPIdx, 0,
4369 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4370 break;
4371 case 2:
4372 LoadMI =
4373 &*MIRBuilder
4374 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4375 .addConstantPoolIndex(CPIdx, 0,
4376 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4377 break;
4378 default:
4379 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4380 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4381 return nullptr;
4382 }
4383 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4384 MachineMemOperand::MOLoad,
4385 Size, Align(Size)));
4386 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4387 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4388 return LoadMI;
4389}
4390
4391/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4392/// size and RB.
4393static std::pair<unsigned, unsigned>
4394getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4395 unsigned Opc, SubregIdx;
4396 if (RB.getID() == AArch64::GPRRegBankID) {
4397 if (EltSize == 16) {
4398 Opc = AArch64::INSvi16gpr;
4399 SubregIdx = AArch64::ssub;
4400 } else if (EltSize == 32) {
4401 Opc = AArch64::INSvi32gpr;
4402 SubregIdx = AArch64::ssub;
4403 } else if (EltSize == 64) {
4404 Opc = AArch64::INSvi64gpr;
4405 SubregIdx = AArch64::dsub;
4406 } else {
4407 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4407)
;
4408 }
4409 } else {
4410 if (EltSize == 8) {
4411 Opc = AArch64::INSvi8lane;
4412 SubregIdx = AArch64::bsub;
4413 } else if (EltSize == 16) {
4414 Opc = AArch64::INSvi16lane;
4415 SubregIdx = AArch64::hsub;
4416 } else if (EltSize == 32) {
4417 Opc = AArch64::INSvi32lane;
4418 SubregIdx = AArch64::ssub;
4419 } else if (EltSize == 64) {
4420 Opc = AArch64::INSvi64lane;
4421 SubregIdx = AArch64::dsub;
4422 } else {
4423 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4423)
;
4424 }
4425 }
4426 return std::make_pair(Opc, SubregIdx);
4427}
4428
4429MachineInstr *AArch64InstructionSelector::emitInstr(
4430 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4431 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4432 const ComplexRendererFns &RenderFns) const {
4433 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4433, __extension__ __PRETTY_FUNCTION__))
;
4434 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4435, __extension__ __PRETTY_FUNCTION__))
4435 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4435, __extension__ __PRETTY_FUNCTION__))
;
4436 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4437 if (RenderFns)
4438 for (auto &Fn : *RenderFns)
4439 Fn(MI);
4440 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4441 return &*MI;
4442}
4443
4444MachineInstr *AArch64InstructionSelector::emitAddSub(
4445 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4446 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4447 MachineIRBuilder &MIRBuilder) const {
4448 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4449 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4449, __extension__ __PRETTY_FUNCTION__))
;
4450 auto Ty = MRI.getType(LHS.getReg());
4451 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4451, __extension__ __PRETTY_FUNCTION__))
;
4452 unsigned Size = Ty.getSizeInBits();
4453 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4453, __extension__ __PRETTY_FUNCTION__))
;
4454 bool Is32Bit = Size == 32;
4455
4456 // INSTRri form with positive arithmetic immediate.
4457 if (auto Fns = selectArithImmed(RHS))
4458 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4459 MIRBuilder, Fns);
4460
4461 // INSTRri form with negative arithmetic immediate.
4462 if (auto Fns = selectNegArithImmed(RHS))
4463 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4464 MIRBuilder, Fns);
4465
4466 // INSTRrx form.
4467 if (auto Fns = selectArithExtendedRegister(RHS))
4468 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4469 MIRBuilder, Fns);
4470
4471 // INSTRrs form.
4472 if (auto Fns = selectShiftedRegister(RHS))
4473 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4474 MIRBuilder, Fns);
4475 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4476 MIRBuilder);
4477}
4478
4479MachineInstr *
4480AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4481 MachineOperand &RHS,
4482 MachineIRBuilder &MIRBuilder) const {
4483 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4484 {{AArch64::ADDXri, AArch64::ADDWri},
4485 {AArch64::ADDXrs, AArch64::ADDWrs},
4486 {AArch64::ADDXrr, AArch64::ADDWrr},
4487 {AArch64::SUBXri, AArch64::SUBWri},
4488 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4489 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4490}
4491
4492MachineInstr *
4493AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4494 MachineOperand &RHS,
4495 MachineIRBuilder &MIRBuilder) const {
4496 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4497 {{AArch64::ADDSXri, AArch64::ADDSWri},
4498 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4499 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4500 {AArch64::SUBSXri, AArch64::SUBSWri},
4501 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4502 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4503}
4504
4505MachineInstr *
4506AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4507 MachineOperand &RHS,
4508 MachineIRBuilder &MIRBuilder) const {
4509 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4510 {{AArch64::SUBSXri, AArch64::SUBSWri},
4511 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4512 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4513 {AArch64::ADDSXri, AArch64::ADDSWri},
4514 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4515 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4516}
4517
4518MachineInstr *
4519AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4520 MachineIRBuilder &MIRBuilder) const {
4521 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4522 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4523 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4524 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4525}
4526
4527MachineInstr *
4528AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4529 MachineIRBuilder &MIRBuilder) const {
4530 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4530, __extension__ __PRETTY_FUNCTION__))
;
4531 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4532 LLT Ty = MRI.getType(LHS.getReg());
4533 unsigned RegSize = Ty.getSizeInBits();
4534 bool Is32Bit = (RegSize == 32);
4535 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4536 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4537 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4538 // ANDS needs a logical immediate for its immediate form. Check if we can
4539 // fold one in.
4540 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4541 int64_t Imm = ValAndVReg->Value.getSExtValue();
4542
4543 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4544 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4545 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4546 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4547 return &*TstMI;
4548 }
4549 }
4550
4551 if (auto Fns = selectLogicalShiftedRegister(RHS))
4552 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4553 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4554}
4555
4556MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4557 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4558 MachineIRBuilder &MIRBuilder) const {
4559 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4559, __extension__ __PRETTY_FUNCTION__))
;
4560 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4560, __extension__ __PRETTY_FUNCTION__))
;
4561 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4562 LLT CmpTy = MRI.getType(LHS.getReg());
4563 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4563, __extension__ __PRETTY_FUNCTION__))
;
4564 unsigned Size = CmpTy.getSizeInBits();
4565 (void)Size;
4566 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4566, __extension__ __PRETTY_FUNCTION__))
;
4567 // Fold the compare into a cmn or tst if possible.
4568 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4569 return FoldCmp;
4570 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4571 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4572}
4573
4574MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4575 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4576 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4577#ifndef NDEBUG
4578 LLT Ty = MRI.getType(Dst);
4579 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4580, __extension__ __PRETTY_FUNCTION__))
4580 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4580, __extension__ __PRETTY_FUNCTION__))
;
4581#endif
4582 const Register ZReg = AArch64::WZR;
4583 AArch64CC::CondCode CC1, CC2;
4584 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4585 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4586 if (CC2 == AArch64CC::AL)
4587 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4588 MIRBuilder);
4589 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4590 Register Def1Reg = MRI.createVirtualRegister(RC);
4591 Register Def2Reg = MRI.createVirtualRegister(RC);
4592 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4593 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4594 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4595 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4596 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4597 return &*OrMI;
4598}
4599
4600MachineInstr *
4601AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4602 MachineIRBuilder &MIRBuilder,
4603 Optional<CmpInst::Predicate> Pred) const {
4604 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4605 LLT Ty = MRI.getType(LHS);
4606 if (Ty.isVector())
4607 return nullptr;
4608 unsigned OpSize = Ty.getSizeInBits();
4609 if (OpSize != 32 && OpSize != 64)
4610 return nullptr;
4611
4612 // If this is a compare against +0.0, then we don't have
4613 // to explicitly materialize a constant.
4614 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4615 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4616
4617 auto IsEqualityPred = [](CmpInst::Predicate P) {
4618 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4619 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4620 };
4621 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4622 // Try commutating the operands.
4623 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4624 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4625 ShouldUseImm = true;
4626 std::swap(LHS, RHS);
4627 }
4628 }
4629 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4630 {AArch64::FCMPSri, AArch64::FCMPDri}};
4631 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4632
4633 // Partially build the compare. Decide if we need to add a use for the
4634 // third operand based off whether or not we're comparing against 0.0.
4635 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4636 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4637 if (!ShouldUseImm)
4638 CmpMI.addUse(RHS);
4639 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4640 return &*CmpMI;
4641}
4642
4643MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4644 Optional<Register> Dst, Register Op1, Register Op2,
4645 MachineIRBuilder &MIRBuilder) const {
4646 // We implement a vector concat by:
4647 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4648 // 2. Insert the upper vector into the destination's upper element
4649 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4650 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4651
4652 const LLT Op1Ty = MRI.getType(Op1);
4653 const LLT Op2Ty = MRI.getType(Op2);
4654
4655 if (Op1Ty != Op2Ty) {
4656 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4657 return nullptr;
4658 }
4659 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4659, __extension__ __PRETTY_FUNCTION__))
;
4660
4661 if (Op1Ty.getSizeInBits() >= 128) {
4662 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4663 return nullptr;
4664 }
4665
4666 // At the moment we just support 64 bit vector concats.
4667 if (Op1Ty.getSizeInBits() != 64) {
4668 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4669 return nullptr;
4670 }
4671
4672 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4673 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4674 const TargetRegisterClass *DstRC =
4675 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4676
4677 MachineInstr *WidenedOp1 =
4678 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4679 MachineInstr *WidenedOp2 =
4680 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4681 if (!WidenedOp1 || !WidenedOp2) {
4682 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4683 return nullptr;
4684 }
4685
4686 // Now do the insert of the upper element.
4687 unsigned InsertOpc, InsSubRegIdx;
4688 std::tie(InsertOpc, InsSubRegIdx) =
4689 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4690
4691 if (!Dst)
4692 Dst = MRI.createVirtualRegister(DstRC);
4693 auto InsElt =
4694 MIRBuilder
4695 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4696 .addImm(1) /* Lane index */
4697 .addUse(WidenedOp2->getOperand(0).getReg())
4698 .addImm(0);
4699 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4700 return &*InsElt;
4701}
4702
4703MachineInstr *
4704AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4705 Register Src2, AArch64CC::CondCode Pred,
4706 MachineIRBuilder &MIRBuilder) const {
4707 auto &MRI = *MIRBuilder.getMRI();
4708 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4709 // If we used a register class, then this won't necessarily have an LLT.
4710 // Compute the size based off whether or not we have a class or bank.
4711 unsigned Size;
4712 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4713 Size = TRI.getRegSizeInBits(*RC);
4714 else
4715 Size = MRI.getType(Dst).getSizeInBits();
4716 // Some opcodes use s1.
4717 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4717, __extension__ __PRETTY_FUNCTION__))
;
4718 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4719 unsigned Opc = OpcTable[Size == 64];
4720 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4721 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4722 return &*CSINC;
4723}
4724
4725std::pair<MachineInstr *, AArch64CC::CondCode>
4726AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4727 MachineOperand &LHS,
4728 MachineOperand &RHS,
4729 MachineIRBuilder &MIRBuilder) const {
4730 switch (Opcode) {
4731 default:
4732 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4732)
;
4733 case TargetOpcode::G_SADDO:
4734 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4735 case TargetOpcode::G_UADDO:
4736 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4737 case TargetOpcode::G_SSUBO:
4738 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4739 case TargetOpcode::G_USUBO:
4740 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4741 }
4742}
4743
4744/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4745/// expressed as a conjunction.
4746/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4747/// changing the conditions on the CMP tests.
4748/// (this means we can call emitConjunctionRec() with
4749/// Negate==true on this sub-tree)
4750/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4751/// cannot do the negation naturally. We are required to
4752/// emit the subtree first in this case.
4753/// \param WillNegate Is true if are called when the result of this
4754/// subexpression must be negated. This happens when the
4755/// outer expression is an OR. We can use this fact to know
4756/// that we have a double negation (or (or ...) ...) that
4757/// can be implemented for free.
4758static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4759 bool WillNegate, MachineRegisterInfo &MRI,
4760 unsigned Depth = 0) {
4761 if (!MRI.hasOneNonDBGUse(Val))
4762 return false;
4763 MachineInstr *ValDef = MRI.getVRegDef(Val);
4764 unsigned Opcode = ValDef->getOpcode();
4765 if (Opcode == TargetOpcode::G_TRUNC) {
4766 // Look through a trunc.
4767 Val = ValDef->getOperand(1).getReg();
4768 ValDef = MRI.getVRegDef(Val);
4769 Opcode = ValDef->getOpcode();
4770 }
4771 if (isa<GAnyCmp>(ValDef)) {
4772 CanNegate = true;
4773 MustBeFirst = false;
4774 return true;
4775 }
4776 // Protect against exponential runtime and stack overflow.
4777 if (Depth > 6)
4778 return false;
4779 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4780 bool IsOR = Opcode == TargetOpcode::G_OR;
4781 Register O0 = ValDef->getOperand(1).getReg();
4782 Register O1 = ValDef->getOperand(2).getReg();
4783 bool CanNegateL;
4784 bool MustBeFirstL;
4785 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4786 return false;
4787 bool CanNegateR;
4788 bool MustBeFirstR;
4789 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4790 return false;
4791
4792 if (MustBeFirstL && MustBeFirstR)
4793 return false;
4794
4795 if (IsOR) {
4796 // For an OR expression we need to be able to naturally negate at least
4797 // one side or we cannot do the transformation at all.
4798 if (!CanNegateL && !CanNegateR)
4799 return false;
4800 // If we the result of the OR will be negated and we can naturally negate
4801 // the leaves, then this sub-tree as a whole negates naturally.
4802 CanNegate = WillNegate && CanNegateL && CanNegateR;
4803 // If we cannot naturally negate the whole sub-tree, then this must be
4804 // emitted first.
4805 MustBeFirst = !CanNegate;
4806 } else {
4807 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4807, __extension__ __PRETTY_FUNCTION__))
;
4808 // We cannot naturally negate an AND operation.
4809 CanNegate = false;
4810 MustBeFirst = MustBeFirstL || MustBeFirstR;
4811 }
4812 return true;
4813 }
4814 return false;
4815}
4816
4817MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4818 Register LHS, Register RHS, CmpInst::Predicate CC,
4819 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4820 MachineIRBuilder &MIB) const {
4821 // TODO: emit CMN as an optimization.
4822 auto &MRI = *MIB.getMRI();
4823 LLT OpTy = MRI.getType(LHS);
4824 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4824, __extension__ __PRETTY_FUNCTION__))
;
4825 unsigned CCmpOpc;
4826 if (CmpInst::isIntPredicate(CC)) {
4827 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4828 } else {
4829 switch (OpTy.getSizeInBits()) {
4830 case 16:
4831 CCmpOpc = AArch64::FCCMPHrr;
4832 break;
4833 case 32:
4834 CCmpOpc = AArch64::FCCMPSrr;
4835 break;
4836 case 64:
4837 CCmpOpc = AArch64::FCCMPDrr;
4838 break;
4839 default:
4840 return nullptr;
4841 }
4842 }
4843 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4844 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4845 auto CCmp =
4846 MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
4847 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4848 return &*CCmp;
4849}
4850
4851MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4852 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4853 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4854 // We're at a tree leaf, produce a conditional comparison operation.
4855 auto &MRI = *MIB.getMRI();
4856 MachineInstr *ValDef = MRI.getVRegDef(Val);
4857 unsigned Opcode = ValDef->getOpcode();
4858 if (Opcode == TargetOpcode::G_TRUNC) {
4859 // Look through a trunc.
4860 Val = ValDef->getOperand(1).getReg();
4861 ValDef = MRI.getVRegDef(Val);
4862 Opcode = ValDef->getOpcode();
4863 }
4864 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4865 Register LHS = Cmp->getLHSReg();
4866 Register RHS = Cmp->getRHSReg();
4867 CmpInst::Predicate CC = Cmp->getCond();
4868 if (Negate)
4869 CC = CmpInst::getInversePredicate(CC);
4870 if (isa<GICmp>(Cmp)) {
4871 OutCC = changeICMPPredToAArch64CC(CC);
4872 } else {
4873 // Handle special FP cases.
4874 AArch64CC::CondCode ExtraCC;
4875 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4876 // Some floating point conditions can't be tested with a single condition
4877 // code. Construct an additional comparison in this case.
4878 if (ExtraCC != AArch64CC::AL) {
4879 MachineInstr *ExtraCmp;
4880 if (!CCOp)
4881 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4882 else
4883 ExtraCmp =
4884 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4885 CCOp = ExtraCmp->getOperand(0).getReg();
4886 Predicate = ExtraCC;
4887 }
4888 }
4889
4890 // Produce a normal comparison if we are first in the chain
4891 if (!CCOp) {
4892 auto Dst = MRI.cloneVirtualRegister(LHS);
4893 if (isa<GICmp>(Cmp))
4894 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4895 return emitFPCompare(Cmp->getOperand(2).getReg(),
4896 Cmp->getOperand(3).getReg(), MIB);
4897 }
4898 // Otherwise produce a ccmp.
4899 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4900 }
4901 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4901, __extension__ __PRETTY_FUNCTION__))
;
4902
4903 bool IsOR = Opcode == TargetOpcode::G_OR;
4904
4905 Register LHS = ValDef->getOperand(1).getReg();
4906 bool CanNegateL;
4907 bool MustBeFirstL;
4908 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4909 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4909, __extension__ __PRETTY_FUNCTION__))
;
4910 (void)ValidL;
4911
4912 Register RHS = ValDef->getOperand(2).getReg();
4913 bool CanNegateR;
4914 bool MustBeFirstR;
4915 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4916 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4916, __extension__ __PRETTY_FUNCTION__))
;
4917 (void)ValidR;
4918
4919 // Swap sub-tree that must come first to the right side.
4920 if (MustBeFirstL) {
4921 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4921, __extension__ __PRETTY_FUNCTION__))
;
4922 std::swap(LHS, RHS);
4923 std::swap(CanNegateL, CanNegateR);
4924 std::swap(MustBeFirstL, MustBeFirstR);
4925 }
4926
4927 bool NegateR;
4928 bool NegateAfterR;
4929 bool NegateL;
4930 bool NegateAfterAll;
4931 if (Opcode == TargetOpcode::G_OR) {
4932 // Swap the sub-tree that we can negate naturally to the left.
4933 if (!CanNegateL) {
4934 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4934, __extension__ __PRETTY_FUNCTION__))
;
4935 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4935, __extension__ __PRETTY_FUNCTION__))
;
4936 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4936, __extension__ __PRETTY_FUNCTION__))
;
4937 std::swap(LHS, RHS);
4938 NegateR = false;
4939 NegateAfterR = true;
4940 } else {
4941 // Negate the left sub-tree if possible, otherwise negate the result.
4942 NegateR = CanNegateR;
4943 NegateAfterR = !CanNegateR;
4944 }
4945 NegateL = true;
4946 NegateAfterAll = !Negate;
4947 } else {
4948 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4949, __extension__ __PRETTY_FUNCTION__))
4949 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4949, __extension__ __PRETTY_FUNCTION__))
;
4950 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4950, __extension__ __PRETTY_FUNCTION__))
;
4951
4952 NegateL = false;
4953 NegateR = false;
4954 NegateAfterR = false;
4955 NegateAfterAll = false;
4956 }
4957
4958 // Emit sub-trees.
4959 AArch64CC::CondCode RHSCC;
4960 MachineInstr *CmpR =
4961 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4962 if (NegateAfterR)
4963 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4964 MachineInstr *CmpL = emitConjunctionRec(
4965 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4966 if (NegateAfterAll)
4967 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4968 return CmpL;
4969}
4970
4971MachineInstr *AArch64InstructionSelector::emitConjunction(
4972 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4973 bool DummyCanNegate;
4974 bool DummyMustBeFirst;
4975 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4976 *MIB.getMRI()))
4977 return nullptr;
4978 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4979}
4980
4981bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4982 MachineInstr &CondMI) {
4983 AArch64CC::CondCode AArch64CC;
4984 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4985 if (!ConjMI)
4986 return false;
4987
4988 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4989 SelI.eraseFromParent();
4990 return true;
4991}
4992
4993bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4994 MachineRegisterInfo &MRI = *MIB.getMRI();
4995 // We want to recognize this pattern:
4996 //
4997 // $z = G_FCMP pred, $x, $y
4998 // ...
4999 // $w = G_SELECT $z, $a, $b
5000 //
5001 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5002 // some copies/truncs in between.)
5003 //
5004 // If we see this, then we can emit something like this:
5005 //
5006 // fcmp $x, $y
5007 // fcsel $w, $a, $b, pred
5008 //
5009 // Rather than emitting both of the rather long sequences in the standard
5010 // G_FCMP/G_SELECT select methods.
5011
5012 // First, check if the condition is defined by a compare.
5013 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5014 while (CondDef) {
5015 // We can only fold if all of the defs have one use.
5016 Register CondDefReg = CondDef->getOperand(0).getReg();
5017 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5018 // Unless it's another select.
5019 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5020 if (CondDef == &UI)
5021 continue;
5022 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5023 return false;
5024 }
5025 }
5026
5027 // We can skip over G_TRUNC since the condition is 1-bit.
5028 // Truncating/extending can have no impact on the value.
5029 unsigned Opc = CondDef->getOpcode();
5030 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
5031 break;
5032
5033 // Can't see past copies from physregs.
5034 if (Opc == TargetOpcode::COPY &&
5035 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
5036 return false;
5037
5038 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
5039 }
5040
5041 // Is the condition defined by a compare?
5042 unsigned CondOpc = CondDef->getOpcode();
5043 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5044 if (tryOptSelectConjunction(I, *CondDef))
5045 return true;
5046 return false;
5047 }
5048
5049 AArch64CC::CondCode CondCode;
5050 if (CondOpc == TargetOpcode::G_ICMP) {
5051 auto Pred =
5052 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5053 CondCode = changeICMPPredToAArch64CC(Pred);
5054 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5055 CondDef->getOperand(1), MIB);
5056 } else {
5057 // Get the condition code for the select.
5058 auto Pred =
5059 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5060 AArch64CC::CondCode CondCode2;
5061 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5062
5063 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5064 // instructions to emit the comparison.
5065 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5066 // unnecessary.
5067 if (CondCode2 != AArch64CC::AL)
5068 return false;
5069
5070 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5071 CondDef->getOperand(3).getReg(), MIB)) {
5072 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5073 return false;
5074 }
5075 }
5076
5077 // Emit the select.
5078 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5079 I.getOperand(3).getReg(), CondCode, MIB);
5080 I.eraseFromParent();
5081 return true;
5082}
5083
5084MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5085 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5086 MachineIRBuilder &MIRBuilder) const {
5087 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5088, __extension__ __PRETTY_FUNCTION__))
5088 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5088, __extension__ __PRETTY_FUNCTION__))
;
5089 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5090 // We want to find this sort of thing:
5091 // x = G_SUB 0, y
5092 // G_ICMP z, x
5093 //
5094 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5095 // e.g:
5096 //
5097 // cmn z, y
5098
5099 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5100 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5101 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5102 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5103 // Given this:
5104 //
5105 // x = G_SUB 0, y
5106 // G_ICMP x, z
5107 //
5108 // Produce this:
5109 //
5110 // cmn y, z
5111 if (isCMN(LHSDef, P, MRI))
5112 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5113
5114 // Same idea here, but with the RHS of the compare instead:
5115 //
5116 // Given this:
5117 //
5118 // x = G_SUB 0, y
5119 // G_ICMP z, x
5120 //
5121 // Produce this:
5122 //
5123 // cmn z, y
5124 if (isCMN(RHSDef, P, MRI))
5125 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5126
5127 // Given this:
5128 //
5129 // z = G_AND x, y
5130 // G_ICMP z, 0
5131 //
5132 // Produce this if the compare is signed:
5133 //
5134 // tst x, y
5135 if (!CmpInst::isUnsigned(P) && LHSDef &&
5136 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5137 // Make sure that the RHS is 0.
5138 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5139 if (!ValAndVReg || ValAndVReg->Value != 0)
5140 return nullptr;
5141
5142 return emitTST(LHSDef->getOperand(1),
5143 LHSDef->getOperand(2), MIRBuilder);
5144 }
5145
5146 return nullptr;
5147}
5148
5149bool AArch64InstructionSelector::selectShuffleVector(
5150 MachineInstr &I, MachineRegisterInfo &MRI) {
5151 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5152 Register Src1Reg = I.getOperand(1).getReg();
5153 const LLT Src1Ty = MRI.getType(Src1Reg);
5154 Register Src2Reg = I.getOperand(2).getReg();
5155 const LLT Src2Ty = MRI.getType(Src2Reg);
5156 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5157
5158 MachineBasicBlock &MBB = *I.getParent();
5159 MachineFunction &MF = *MBB.getParent();
5160 LLVMContext &Ctx = MF.getFunction().getContext();
5161
5162 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5163 // it's originated from a <1 x T> type. Those should have been lowered into
5164 // G_BUILD_VECTOR earlier.
5165 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5166 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5167 return false;
5168 }
5169
5170 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5171
5172 SmallVector<Constant *, 64> CstIdxs;
5173 for (int Val : Mask) {
5174 // For now, any undef indexes we'll just assume to be 0. This should be
5175 // optimized in future, e.g. to select DUP etc.
5176 Val = Val < 0 ? 0 : Val;
5177 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5178 unsigned Offset = Byte + Val * BytesPerElt;
5179 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5180 }
5181 }
5182
5183 // Use a constant pool to load the index vector for TBL.
5184 Constant *CPVal = ConstantVector::get(CstIdxs);
5185 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5186 if (!IndexLoad) {
5187 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5188 return false;
5189 }
5190
5191 if (DstTy.getSizeInBits() != 128) {
5192 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5192, __extension__ __PRETTY_FUNCTION__))
;
5193 // This case can be done with TBL1.
5194 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
5195 if (!Concat) {
5196 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5197 return false;
5198 }
5199
5200 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5201 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5202 IndexLoad->getOperand(0).getReg(), MIB);
5203
5204 auto TBL1 = MIB.buildInstr(
5205 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5206 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5207 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5208
5209 auto Copy =
5210 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5211 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5212 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5213 I.eraseFromParent();
5214 return true;
5215 }
5216
5217 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5218 // Q registers for regalloc.
5219 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5220 auto RegSeq = createQTuple(Regs, MIB);
5221 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5222 {RegSeq, IndexLoad->getOperand(0)});
5223 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5224 I.eraseFromParent();
5225 return true;
5226}
5227
5228MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5229 Optional<Register> DstReg, Register SrcReg, Register EltReg,
5230 unsigned LaneIdx, const RegisterBank &RB,
5231 MachineIRBuilder &MIRBuilder) const {
5232 MachineInstr *InsElt = nullptr;
5233 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5234 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5235
5236 // Create a register to define with the insert if one wasn't passed in.
5237 if (!DstReg)
5238 DstReg = MRI.createVirtualRegister(DstRC);
5239
5240 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5241 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5242
5243 if (RB.getID() == AArch64::FPRRegBankID) {
5244 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5245 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5246 .addImm(LaneIdx)
5247 .addUse(InsSub->getOperand(0).getReg())
5248 .addImm(0);
5249 } else {
5250 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5251 .addImm(LaneIdx)
5252 .addUse(EltReg);
5253 }
5254
5255 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5256 return InsElt;
5257}
5258
5259bool AArch64InstructionSelector::selectUSMovFromExtend(
5260 MachineInstr &MI, MachineRegisterInfo &MRI) {
5261 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5262 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5263 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5264 return false;
5265 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5266 const Register DefReg = MI.getOperand(0).getReg();
5267 const LLT DstTy = MRI.getType(DefReg);
5268 unsigned DstSize = DstTy.getSizeInBits();
5269
5270 if (DstSize != 32 && DstSize != 64)
5271 return false;
5272
5273 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5274 MI.getOperand(1).getReg(), MRI);
5275 int64_t Lane;
5276 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5277 return false;
5278 Register Src0 = Extract->getOperand(1).getReg();
5279
5280 const LLT &VecTy = MRI.getType(Src0);
5281
5282 if (VecTy.getSizeInBits() != 128) {
5283 const MachineInstr *ScalarToVector = emitScalarToVector(
5284 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5285 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5285, __extension__ __PRETTY_FUNCTION__))
;
5286 Src0 = ScalarToVector->getOperand(0).getReg();
5287 }
5288
5289 unsigned Opcode;
5290 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5291 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5292 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5293 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5294 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5295 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5296 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5297 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5298 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5299 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5300 else
5301 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5301)
;
5302
5303 // We may need to generate one of these, depending on the type and sign of the
5304 // input:
5305 // DstReg = SMOV Src0, Lane;
5306 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5307 MachineInstr *ExtI = nullptr;
5308 if (DstSize == 64 && !IsSigned) {
5309 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5310 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5311 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5312 .addImm(0)
5313 .addUse(NewReg)
5314 .addImm(AArch64::sub_32);
5315 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5316 } else
5317 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5318
5319 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5320 MI.eraseFromParent();
5321 return true;
5322}
5323
5324bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5325 MachineRegisterInfo &MRI) {
5326 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5326, __extension__ __PRETTY_FUNCTION__))
;
5327
5328 // Get information on the destination.
5329 Register DstReg = I.getOperand(0).getReg();
5330 const LLT DstTy = MRI.getType(DstReg);
5331 unsigned VecSize = DstTy.getSizeInBits();
5332
5333 // Get information on the element we want to insert into the destination.
5334 Register EltReg = I.getOperand(2).getReg();
5335 const LLT EltTy = MRI.getType(EltReg);
5336 unsigned EltSize = EltTy.getSizeInBits();
5337 if (EltSize < 16 || EltSize > 64)
5338 return false; // Don't support all element types yet.
5339
5340 // Find the definition of the index. Bail out if it's not defined by a
5341 // G_CONSTANT.
5342 Register IdxReg = I.getOperand(3).getReg();
5343 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5344 if (!VRegAndVal)
5345 return false;
5346 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5347
5348 // Perform the lane insert.
5349 Register SrcReg = I.getOperand(1).getReg();
5350 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5351
5352 if (VecSize < 128) {
5353 // If the vector we're inserting into is smaller than 128 bits, widen it
5354 // to 128 to do the insert.
5355 MachineInstr *ScalarToVec =
5356 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5357 if (!ScalarToVec)
5358 return false;
5359 SrcReg = ScalarToVec->getOperand(0).getReg();
5360 }
5361
5362 // Create an insert into a new FPR128 register.
5363 // Note that if our vector is already 128 bits, we end up emitting an extra
5364 // register.
5365 MachineInstr *InsMI =
5366 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5367
5368 if (VecSize < 128) {
5369 // If we had to widen to perform the insert, then we have to demote back to
5370 // the original size to get the result we want.
5371 Register DemoteVec = InsMI->getOperand(0).getReg();
5372 const TargetRegisterClass *RC =
5373 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
5374 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5375 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5376 return false;
5377 }
5378 unsigned SubReg = 0;
5379 if (!getSubRegForClass(RC, TRI, SubReg))
5380 return false;
5381 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5382 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5383 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5384 return false;
5385 }
5386 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5387 .addReg(DemoteVec, 0, SubReg);
5388 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5389 } else {
5390 // No widening needed.
5391 InsMI->getOperand(0).setReg(DstReg);
5392 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5393 }
5394
5395 I.eraseFromParent();
5396 return true;
5397}
5398
5399MachineInstr *
5400AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5401 MachineIRBuilder &MIRBuilder,
5402 MachineRegisterInfo &MRI) {
5403 LLT DstTy = MRI.getType(Dst);
5404 unsigned DstSize = DstTy.getSizeInBits();
5405 if (CV->isNullValue()) {
5406 if (DstSize == 128) {
5407 auto Mov =
5408 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5409 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5410 return &*Mov;
5411 }
5412
5413 if (DstSize == 64) {
5414 auto Mov =
5415 MIRBuilder
5416 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5417 .addImm(0);
5418 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5419 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5420 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5421 return &*Copy;
5422 }
5423 }
5424
5425 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5426 if (!CPLoad) {
5427 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5428 return nullptr;
5429 }
5430
5431 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5432 RBI.constrainGenericRegister(
5433 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5434 return &*Copy;
5435}
5436
5437bool AArch64InstructionSelector::tryOptConstantBuildVec(
5438 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5439 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5439, __extension__ __PRETTY_FUNCTION__))
;
5440 unsigned DstSize = DstTy.getSizeInBits();
5441 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5441, __extension__ __PRETTY_FUNCTION__))
;
5442 if (DstSize < 32)
5443 return false;
5444 // Check if we're building a constant vector, in which case we want to
5445 // generate a constant pool load instead of a vector insert sequence.
5446 SmallVector<Constant *, 16> Csts;
5447 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5448 // Try to find G_CONSTANT or G_FCONSTANT
5449 auto *OpMI =
5450 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5451 if (OpMI)
5452 Csts.emplace_back(
5453 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5454 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5455 I.getOperand(Idx).getReg(), MRI)))
5456 Csts.emplace_back(
5457 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5458 else
5459 return false;
5460 }
5461 Constant *CV = ConstantVector::get(Csts);
5462 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5463 return false;
5464 I.eraseFromParent();
5465 return true;
5466}
5467
5468bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5469 MachineInstr &I, MachineRegisterInfo &MRI) {
5470 // Given:
5471 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5472 //
5473 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5474 Register Dst = I.getOperand(0).getReg();
5475 Register EltReg = I.getOperand(1).getReg();
5476 LLT EltTy = MRI.getType(EltReg);
5477 // If the index isn't on the same bank as its elements, then this can't be a
5478 // SUBREG_TO_REG.
5479 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5480 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5481 if (EltRB != DstRB)
5482 return false;
5483 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5484 [&MRI](const MachineOperand &Op) {
5485 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5486 MRI);
5487 }))
5488 return false;
5489 unsigned SubReg;
5490 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5491 if (!EltRC)
5492 return false;
5493 const TargetRegisterClass *DstRC =
5494 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5495 if (!DstRC)
5496 return false;
5497 if (!getSubRegForClass(EltRC, TRI, SubReg))
5498 return false;
5499 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5500 .addImm(0)
5501 .addUse(EltReg)
5502 .addImm(SubReg);
5503 I.eraseFromParent();
5504 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5505 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5506}
5507
5508bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5509 MachineRegisterInfo &MRI) {
5510 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5510, __extension__ __PRETTY_FUNCTION__))
;
5511 // Until we port more of the optimized selections, for now just use a vector
5512 // insert sequence.
5513 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5514 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5515 unsigned EltSize = EltTy.getSizeInBits();
5516
5517 if (tryOptConstantBuildVec(I, DstTy, MRI))
5518 return true;
5519 if (tryOptBuildVecToSubregToReg(I, MRI))
5520 return true;
5521
5522 if (EltSize < 16 || EltSize > 64)
5523 return false; // Don't support all element types yet.
5524 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5525
5526 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5527 MachineInstr *ScalarToVec =
5528 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5529 I.getOperand(1).getReg(), MIB);
5530 if (!ScalarToVec)
5531 return false;
5532
5533 Register DstVec = ScalarToVec->getOperand(0).getReg();
5534 unsigned DstSize = DstTy.getSizeInBits();
5535
5536 // Keep track of the last MI we inserted. Later on, we might be able to save
5537 // a copy using it.
5538 MachineInstr *PrevMI = nullptr;
5539 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5540 // Note that if we don't do a subregister copy, we can end up making an
5541 // extra register.
5542 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5543 MIB);
5544 DstVec = PrevMI->getOperand(0).getReg();
5545 }
5546
5547 // If DstTy's size in bits is less than 128, then emit a subregister copy
5548 // from DstVec to the last register we've defined.
5549 if (DstSize < 128) {
5550 // Force this to be FPR using the destination vector.
5551 const TargetRegisterClass *RC =
5552 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5553 if (!RC)
5554 return false;
5555 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5556 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5557 return false;
5558 }
5559
5560 unsigned SubReg = 0;
5561 if (!getSubRegForClass(RC, TRI, SubReg))
5562 return false;
5563 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5564 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5565 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5566 return false;
5567 }
5568
5569 Register Reg = MRI.createVirtualRegister(RC);
5570 Register DstReg = I.getOperand(0).getReg();
5571
5572 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5573 MachineOperand &RegOp = I.getOperand(1);
5574 RegOp.setReg(Reg);
5575 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5576 } else {
5577 // We don't need a subregister copy. Save a copy by re-using the
5578 // destination register on the final insert.
5579 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5579, __extension__ __PRETTY_FUNCTION__))
;
5580 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5581 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5582 }
5583
5584 I.eraseFromParent();
5585 return true;
5586}
5587
5588bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5589 unsigned NumVecs,
5590 MachineInstr &I) {
5591 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5591, __extension__ __PRETTY_FUNCTION__))
;
5592 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5592, __extension__ __PRETTY_FUNCTION__))
;
5593 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5593, __extension__ __PRETTY_FUNCTION__))
;
5594 auto &MRI = *MIB.getMRI();
5595 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5596 unsigned Size = Ty.getSizeInBits();
5597 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5598, __extension__ __PRETTY_FUNCTION__))
5598 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5598, __extension__ __PRETTY_FUNCTION__))
;
5599 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5600 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5601 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5601, __extension__ __PRETTY_FUNCTION__))
;
5602 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5603 Load.cloneMemRefs(I);
5604 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5605 Register SelectedLoadDst = Load->getOperand(0).getReg();
5606 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5607 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5608 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5609 // Emit the subreg copies and immediately select them.
5610 // FIXME: We should refactor our copy code into an emitCopy helper and
5611 // clean up uses of this pattern elsewhere in the selector.
5612 selectCopy(*Vec, TII, MRI, TRI, RBI);
5613 }
5614 return true;
5615}
5616
5617bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5618 MachineInstr &I, MachineRegisterInfo &MRI) {
5619 // Find the intrinsic ID.
5620 unsigned IntrinID = I.getIntrinsicID();
5621
5622 const LLT S8 = LLT::scalar(8);
5623 const LLT S16 = LLT::scalar(16);
5624 const LLT S32 = LLT::scalar(32);
5625 const LLT S64 = LLT::scalar(64);
5626 const LLT P0 = LLT::pointer(0, 64);
5627 // Select the instruction.
5628 switch (IntrinID) {
5629 default:
5630 return false;
5631 case Intrinsic::aarch64_ldxp:
5632 case Intrinsic::aarch64_ldaxp: {
5633 auto NewI = MIB.buildInstr(
5634 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5635 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5636 {I.getOperand(3)});
5637 NewI.cloneMemRefs(I);
5638 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5639 break;
5640 }
5641 case Intrinsic::trap:
5642 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5643 break;
5644 case Intrinsic::debugtrap:
5645 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5646 break;
5647 case Intrinsic::ubsantrap:
5648 MIB.buildInstr(AArch64::BRK, {}, {})
5649 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5650 break;
5651 case Intrinsic::aarch64_neon_ld2: {
5652 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5653 unsigned Opc = 0;
5654 if (Ty == LLT::fixed_vector(8, S8))
5655 Opc = AArch64::LD2Twov8b;
5656 else if (Ty == LLT::fixed_vector(16, S8))
5657 Opc = AArch64::LD2Twov16b;
5658 else if (Ty == LLT::fixed_vector(4, S16))
5659 Opc = AArch64::LD2Twov4h;
5660 el