Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 5540, column 67
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-09-26-161721-17566-1 -x c++ /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
15#include "AArch64MachineFunctionInfo.h"
16#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
19#include "AArch64TargetMachine.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "MCTargetDesc/AArch64MCTargetDesc.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27#include "llvm/CodeGen/GlobalISel/Utils.h"
28#include "llvm/CodeGen/MachineBasicBlock.h"
29#include "llvm/CodeGen/MachineConstantPool.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineOperand.h"
34#include "llvm/CodeGen/MachineRegisterInfo.h"
35#include "llvm/CodeGen/TargetOpcodes.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Type.h"
38#include "llvm/IR/IntrinsicsAArch64.h"
39#include "llvm/Pass.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/raw_ostream.h"
42
43#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
44
45using namespace llvm;
46using namespace MIPatternMatch;
47
48namespace {
49
50#define GET_GLOBALISEL_PREDICATE_BITSET
51#include "AArch64GenGlobalISel.inc"
52#undef GET_GLOBALISEL_PREDICATE_BITSET
53
54class AArch64InstructionSelector : public InstructionSelector {
55public:
56 AArch64InstructionSelector(const AArch64TargetMachine &TM,
57 const AArch64Subtarget &STI,
58 const AArch64RegisterBankInfo &RBI);
59
60 bool select(MachineInstr &I) override;
61 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
62
63 void setupMF(MachineFunction &MF, GISelKnownBits &KB,
64 CodeGenCoverage &CoverageInfo) override {
65 InstructionSelector::setupMF(MF, KB, CoverageInfo);
66
67 // hasFnAttribute() is expensive to call on every BRCOND selection, so
68 // cache it here for each run of the selector.
69 ProduceNonFlagSettingCondBr =
70 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
71 MFReturnAddr = Register();
72
73 processPHIs(MF);
74 }
75
76private:
77 /// tblgen-erated 'select' implementation, used as the initial selector for
78 /// the patterns that don't require complex C++.
79 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
80
81 // A lowering phase that runs before any selection attempts.
82 // Returns true if the instruction was modified.
83 bool preISelLower(MachineInstr &I);
84
85 // An early selection function that runs before the selectImpl() call.
86 bool earlySelect(MachineInstr &I) const;
87
88 // Do some preprocessing of G_PHIs before we begin selection.
89 void processPHIs(MachineFunction &MF);
90
91 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
92
93 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
94 bool contractCrossBankCopyIntoStore(MachineInstr &I,
95 MachineRegisterInfo &MRI);
96
97 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
98
99 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
100 MachineRegisterInfo &MRI) const;
101 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
102 MachineRegisterInfo &MRI) const;
103
104 bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
105 int64_t CmpConstant,
106 const CmpInst::Predicate &Pred,
107 MachineBasicBlock *DstMBB,
108 MachineIRBuilder &MIB) const;
109 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
110 MachineRegisterInfo &MRI) const;
111
112 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
113 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
114
115 // Helper to generate an equivalent of scalar_to_vector into a new register,
116 // returned via 'Dst'.
117 MachineInstr *emitScalarToVector(unsigned EltSize,
118 const TargetRegisterClass *DstRC,
119 Register Scalar,
120 MachineIRBuilder &MIRBuilder) const;
121
122 /// Emit a lane insert into \p DstReg, or a new vector register if None is
123 /// provided.
124 ///
125 /// The lane inserted into is defined by \p LaneIdx. The vector source
126 /// register is given by \p SrcReg. The register containing the element is
127 /// given by \p EltReg.
128 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
129 Register EltReg, unsigned LaneIdx,
130 const RegisterBank &RB,
131 MachineIRBuilder &MIRBuilder) const;
132 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
133 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
134 MachineRegisterInfo &MRI) const;
135 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
136 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
137 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
138
139 bool tryOptShuffleDupLane(MachineInstr &I, LLT DstTy, LLT SrcTy,
140 ArrayRef<int> Mask, MachineRegisterInfo &MRI) const;
141 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
142 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
143 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
144 bool selectSplitVectorUnmerge(MachineInstr &I,
145 MachineRegisterInfo &MRI) const;
146 bool selectIntrinsicWithSideEffects(MachineInstr &I,
147 MachineRegisterInfo &MRI) const;
148 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
149 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
150 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
151 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
152 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
153 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
154 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
155
156 unsigned emitConstantPoolEntry(const Constant *CPVal,
157 MachineFunction &MF) const;
158 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
159 MachineIRBuilder &MIRBuilder) const;
160
161 // Emit a vector concat operation.
162 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
163 Register Op2,
164 MachineIRBuilder &MIRBuilder) const;
165
166 // Emit an integer compare between LHS and RHS, which checks for Predicate.
167 //
168 // This returns the produced compare instruction, and the predicate which
169 // was ultimately used in the compare. The predicate may differ from what
170 // is passed in \p Predicate due to optimization.
171 std::pair<MachineInstr *, CmpInst::Predicate>
172 emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
173 MachineOperand &Predicate,
174 MachineIRBuilder &MIRBuilder) const;
175 MachineInstr *emitInstr(unsigned Opcode,
176 std::initializer_list<llvm::DstOp> DstOps,
177 std::initializer_list<llvm::SrcOp> SrcOps,
178 MachineIRBuilder &MIRBuilder,
179 const ComplexRendererFns &RenderFns = None) const;
180 /// Helper function to emit a binary operation such as an ADD, ADDS, etc.
181 ///
182 /// This is intended for instructions with the following opcode variants:
183 ///
184 /// - Xri, Wri (arithmetic immediate form)
185 /// - Xrs, Wrs (shifted register form)
186 /// - Xrr, Wrr (register form)
187 ///
188 /// For example, for ADD, we have ADDXri, ADDWri, ADDXrs, etc.
189 ///
190 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
191 /// in a specific order.
192 ///
193 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
194 ///
195 /// \code
196 /// const std::array<std::array<unsigned, 2>, 3> Table {
197 /// {{AArch64::ADDXri, AArch64::ADDWri},
198 /// {AArch64::ADDXrs, AArch64::ADDWrs},
199 /// {AArch64::ADDXrr, AArch64::ADDWrr}}};
200 /// \endcode
201 ///
202 /// Each row in the table corresponds to a different addressing mode. Each
203 /// column corresponds to a different register size.
204 ///
205 /// \attention Rows must be structured as follows:
206 /// - Row 0: The ri opcode variants
207 /// - Row 1: The rs opcode variants
208 /// - Row 2: The rr opcode variants
209 ///
210 /// \attention Columns must be structured as follows:
211 /// - Column 0: The 64-bit opcode variants
212 /// - Column 1: The 32-bit opcode variants
213 ///
214 /// \p Dst is the destination register of the binop to emit.
215 /// \p LHS is the left-hand operand of the binop to emit.
216 /// \p RHS is the right-hand operand of the binop to emit.
217 MachineInstr *emitBinOp(
218 const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
219 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
220 MachineIRBuilder &MIRBuilder) const;
221 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
222 MachineOperand &RHS,
223 MachineIRBuilder &MIRBuilder) const;
224 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
225 MachineIRBuilder &MIRBuilder) const;
226 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
227 MachineIRBuilder &MIRBuilder) const;
228 MachineInstr *emitTST(const Register &LHS, const Register &RHS,
229 MachineIRBuilder &MIRBuilder) const;
230 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
231 const RegisterBank &DstRB, LLT ScalarTy,
232 Register VecReg, unsigned LaneIdx,
233 MachineIRBuilder &MIRBuilder) const;
234
235 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
236 /// materialized using a FMOV instruction, then update MI and return it.
237 /// Otherwise, do nothing and return a nullptr.
238 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
239 MachineRegisterInfo &MRI) const;
240
241 /// Emit a CSet for a compare.
242 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
243 MachineIRBuilder &MIRBuilder) const;
244
245 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
246 /// \p IsNegative is true if the test should be "not zero".
247 /// This will also optimize the test bit instruction when possible.
248 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
249 MachineBasicBlock *DstMBB,
250 MachineIRBuilder &MIB) const;
251
252 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
253 // We use these manually instead of using the importer since it doesn't
254 // support SDNodeXForm.
255 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
256 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
257 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
258 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
259
260 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
261 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
262 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
263
264 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
265 unsigned Size) const;
266
267 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
268 return selectAddrModeUnscaled(Root, 1);
269 }
270 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
271 return selectAddrModeUnscaled(Root, 2);
272 }
273 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
274 return selectAddrModeUnscaled(Root, 4);
275 }
276 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
277 return selectAddrModeUnscaled(Root, 8);
278 }
279 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
280 return selectAddrModeUnscaled(Root, 16);
281 }
282
283 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
284 /// from complex pattern matchers like selectAddrModeIndexed().
285 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
286 MachineRegisterInfo &MRI) const;
287
288 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
289 unsigned Size) const;
290 template <int Width>
291 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
292 return selectAddrModeIndexed(Root, Width / 8);
293 }
294
295 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
296 const MachineRegisterInfo &MRI) const;
297 ComplexRendererFns
298 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
299 unsigned SizeInBytes) const;
300
301 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
302 /// or not a shift + extend should be folded into an addressing mode. Returns
303 /// None when this is not profitable or possible.
304 ComplexRendererFns
305 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
306 MachineOperand &Offset, unsigned SizeInBytes,
307 bool WantsExt) const;
308 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
309 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
310 unsigned SizeInBytes) const;
311 template <int Width>
312 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
313 return selectAddrModeXRO(Root, Width / 8);
314 }
315
316 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
317 unsigned SizeInBytes) const;
318 template <int Width>
319 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
320 return selectAddrModeWRO(Root, Width / 8);
321 }
322
323 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
324
325 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
326 return selectShiftedRegister(Root);
327 }
328
329 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
330 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
331 // For now, make them the same. The only difference between the two is that
332 // logical shifts are allowed to fold in rotates. Otherwise, these are
333 // functionally the same.
334 return selectShiftedRegister(Root);
335 }
336
337 /// Given an extend instruction, determine the correct shift-extend type for
338 /// that instruction.
339 ///
340 /// If the instruction is going to be used in a load or store, pass
341 /// \p IsLoadStore = true.
342 AArch64_AM::ShiftExtendType
343 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
344 bool IsLoadStore = false) const;
345
346 /// Instructions that accept extend modifiers like UXTW expect the register
347 /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
348 /// subregister copy if necessary. Return either ExtReg, or the result of the
349 /// new copy.
350 Register narrowExtendRegIfNeeded(Register ExtReg,
351 MachineIRBuilder &MIB) const;
352 Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
353 MachineIRBuilder &MIB) const;
354 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
355
356 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
357 int OpIdx = -1) const;
358 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
359 int OpIdx = -1) const;
360 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
361 int OpIdx = -1) const;
362
363 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
364 void materializeLargeCMVal(MachineInstr &I, const Value *V,
365 unsigned OpFlags) const;
366
367 // Optimization methods.
368 bool tryOptSelect(MachineInstr &MI) const;
369 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
370 MachineOperand &Predicate,
371 MachineIRBuilder &MIRBuilder) const;
372 MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
373 MachineOperand &RHS,
374 CmpInst::Predicate &Predicate,
375 MachineIRBuilder &MIB) const;
376 MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
377 MachineOperand &RHS,
378 MachineIRBuilder &MIB) const;
379
380 /// Return true if \p MI is a load or store of \p NumBytes bytes.
381 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
382
383 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
384 /// register zeroed out. In other words, the result of MI has been explicitly
385 /// zero extended.
386 bool isDef32(const MachineInstr &MI) const;
387
388 const AArch64TargetMachine &TM;
389 const AArch64Subtarget &STI;
390 const AArch64InstrInfo &TII;
391 const AArch64RegisterInfo &TRI;
392 const AArch64RegisterBankInfo &RBI;
393
394 bool ProduceNonFlagSettingCondBr = false;
395
396 // Some cached values used during selection.
397 // We use LR as a live-in register, and we keep track of it here as it can be
398 // clobbered by calls.
399 Register MFReturnAddr;
400
401#define GET_GLOBALISEL_PREDICATES_DECL
402#include "AArch64GenGlobalISel.inc"
403#undef GET_GLOBALISEL_PREDICATES_DECL
404
405// We declare the temporaries used by selectImpl() in the class to minimize the
406// cost of constructing placeholder values.
407#define GET_GLOBALISEL_TEMPORARIES_DECL
408#include "AArch64GenGlobalISel.inc"
409#undef GET_GLOBALISEL_TEMPORARIES_DECL
410};
411
412} // end anonymous namespace
413
414#define GET_GLOBALISEL_IMPL
415#include "AArch64GenGlobalISel.inc"
416#undef GET_GLOBALISEL_IMPL
417
418AArch64InstructionSelector::AArch64InstructionSelector(
419 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
420 const AArch64RegisterBankInfo &RBI)
421 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
422 TRI(*STI.getRegisterInfo()), RBI(RBI),
423#define GET_GLOBALISEL_PREDICATES_INIT
424#include "AArch64GenGlobalISel.inc"
425#undef GET_GLOBALISEL_PREDICATES_INIT
426#define GET_GLOBALISEL_TEMPORARIES_INIT
427#include "AArch64GenGlobalISel.inc"
428#undef GET_GLOBALISEL_TEMPORARIES_INIT
429{
430}
431
432// FIXME: This should be target-independent, inferred from the types declared
433// for each class in the bank.
434static const TargetRegisterClass *
435getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
436 const RegisterBankInfo &RBI,
437 bool GetAllRegSet = false) {
438 if (RB.getID() == AArch64::GPRRegBankID) {
439 if (Ty.getSizeInBits() <= 32)
440 return GetAllRegSet ? &AArch64::GPR32allRegClass
441 : &AArch64::GPR32RegClass;
442 if (Ty.getSizeInBits() == 64)
443 return GetAllRegSet ? &AArch64::GPR64allRegClass
444 : &AArch64::GPR64RegClass;
445 return nullptr;
446 }
447
448 if (RB.getID() == AArch64::FPRRegBankID) {
449 if (Ty.getSizeInBits() <= 16)
450 return &AArch64::FPR16RegClass;
451 if (Ty.getSizeInBits() == 32)
452 return &AArch64::FPR32RegClass;
453 if (Ty.getSizeInBits() == 64)
454 return &AArch64::FPR64RegClass;
455 if (Ty.getSizeInBits() == 128)
456 return &AArch64::FPR128RegClass;
457 return nullptr;
458 }
459
460 return nullptr;
461}
462
463/// Given a register bank, and size in bits, return the smallest register class
464/// that can represent that combination.
465static const TargetRegisterClass *
466getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
467 bool GetAllRegSet = false) {
468 unsigned RegBankID = RB.getID();
469
470 if (RegBankID == AArch64::GPRRegBankID) {
471 if (SizeInBits <= 32)
472 return GetAllRegSet ? &AArch64::GPR32allRegClass
473 : &AArch64::GPR32RegClass;
474 if (SizeInBits == 64)
475 return GetAllRegSet ? &AArch64::GPR64allRegClass
476 : &AArch64::GPR64RegClass;
477 }
478
479 if (RegBankID == AArch64::FPRRegBankID) {
480 switch (SizeInBits) {
481 default:
482 return nullptr;
483 case 8:
484 return &AArch64::FPR8RegClass;
485 case 16:
486 return &AArch64::FPR16RegClass;
487 case 32:
488 return &AArch64::FPR32RegClass;
489 case 64:
490 return &AArch64::FPR64RegClass;
491 case 128:
492 return &AArch64::FPR128RegClass;
493 }
494 }
495
496 return nullptr;
497}
498
499/// Returns the correct subregister to use for a given register class.
500static bool getSubRegForClass(const TargetRegisterClass *RC,
501 const TargetRegisterInfo &TRI, unsigned &SubReg) {
502 switch (TRI.getRegSizeInBits(*RC)) {
503 case 8:
504 SubReg = AArch64::bsub;
505 break;
506 case 16:
507 SubReg = AArch64::hsub;
508 break;
509 case 32:
510 if (RC != &AArch64::FPR32RegClass)
511 SubReg = AArch64::sub_32;
512 else
513 SubReg = AArch64::ssub;
514 break;
515 case 64:
516 SubReg = AArch64::dsub;
517 break;
518 default:
519 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
520 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
521 return false;
522 }
523
524 return true;
525}
526
527/// Returns the minimum size the given register bank can hold.
528static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
529 switch (RB.getID()) {
530 case AArch64::GPRRegBankID:
531 return 32;
532 case AArch64::FPRRegBankID:
533 return 8;
534 default:
535 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 535)
;
536 }
537}
538
539static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
540 auto &MI = *Root.getParent();
541 auto &MBB = *MI.getParent();
542 auto &MF = *MBB.getParent();
543 auto &MRI = MF.getRegInfo();
544 uint64_t Immed;
545 if (Root.isImm())
546 Immed = Root.getImm();
547 else if (Root.isCImm())
548 Immed = Root.getCImm()->getZExtValue();
549 else if (Root.isReg()) {
550 auto ValAndVReg =
551 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
552 if (!ValAndVReg)
553 return None;
554 Immed = ValAndVReg->Value;
555 } else
556 return None;
557 return Immed;
558}
559
560/// Check whether \p I is a currently unsupported binary operation:
561/// - it has an unsized type
562/// - an operand is not a vreg
563/// - all operands are not in the same bank
564/// These are checks that should someday live in the verifier, but right now,
565/// these are mostly limitations of the aarch64 selector.
566static bool unsupportedBinOp(const MachineInstr &I,
567 const AArch64RegisterBankInfo &RBI,
568 const MachineRegisterInfo &MRI,
569 const AArch64RegisterInfo &TRI) {
570 LLT Ty = MRI.getType(I.getOperand(0).getReg());
571 if (!Ty.isValid()) {
572 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
573 return true;
574 }
575
576 const RegisterBank *PrevOpBank = nullptr;
577 for (auto &MO : I.operands()) {
578 // FIXME: Support non-register operands.
579 if (!MO.isReg()) {
580 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
581 return true;
582 }
583
584 // FIXME: Can generic operations have physical registers operands? If
585 // so, this will need to be taught about that, and we'll need to get the
586 // bank out of the minimal class for the register.
587 // Either way, this needs to be documented (and possibly verified).
588 if (!Register::isVirtualRegister(MO.getReg())) {
589 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
590 return true;
591 }
592
593 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
594 if (!OpBank) {
595 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
596 return true;
597 }
598
599 if (PrevOpBank && OpBank != PrevOpBank) {
600 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
601 return true;
602 }
603 PrevOpBank = OpBank;
604 }
605 return false;
606}
607
608/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
609/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
610/// and of size \p OpSize.
611/// \returns \p GenericOpc if the combination is unsupported.
612static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
613 unsigned OpSize) {
614 switch (RegBankID) {
615 case AArch64::GPRRegBankID:
616 if (OpSize == 32) {
617 switch (GenericOpc) {
618 case TargetOpcode::G_SHL:
619 return AArch64::LSLVWr;
620 case TargetOpcode::G_LSHR:
621 return AArch64::LSRVWr;
622 case TargetOpcode::G_ASHR:
623 return AArch64::ASRVWr;
624 default:
625 return GenericOpc;
626 }
627 } else if (OpSize == 64) {
628 switch (GenericOpc) {
629 case TargetOpcode::G_PTR_ADD:
630 return AArch64::ADDXrr;
631 case TargetOpcode::G_SHL:
632 return AArch64::LSLVXr;
633 case TargetOpcode::G_LSHR:
634 return AArch64::LSRVXr;
635 case TargetOpcode::G_ASHR:
636 return AArch64::ASRVXr;
637 default:
638 return GenericOpc;
639 }
640 }
641 break;
642 case AArch64::FPRRegBankID:
643 switch (OpSize) {
644 case 32:
645 switch (GenericOpc) {
646 case TargetOpcode::G_FADD:
647 return AArch64::FADDSrr;
648 case TargetOpcode::G_FSUB:
649 return AArch64::FSUBSrr;
650 case TargetOpcode::G_FMUL:
651 return AArch64::FMULSrr;
652 case TargetOpcode::G_FDIV:
653 return AArch64::FDIVSrr;
654 default:
655 return GenericOpc;
656 }
657 case 64:
658 switch (GenericOpc) {
659 case TargetOpcode::G_FADD:
660 return AArch64::FADDDrr;
661 case TargetOpcode::G_FSUB:
662 return AArch64::FSUBDrr;
663 case TargetOpcode::G_FMUL:
664 return AArch64::FMULDrr;
665 case TargetOpcode::G_FDIV:
666 return AArch64::FDIVDrr;
667 case TargetOpcode::G_OR:
668 return AArch64::ORRv8i8;
669 default:
670 return GenericOpc;
671 }
672 }
673 break;
674 }
675 return GenericOpc;
676}
677
678/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
679/// appropriate for the (value) register bank \p RegBankID and of memory access
680/// size \p OpSize. This returns the variant with the base+unsigned-immediate
681/// addressing mode (e.g., LDRXui).
682/// \returns \p GenericOpc if the combination is unsupported.
683static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
684 unsigned OpSize) {
685 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
686 switch (RegBankID) {
687 case AArch64::GPRRegBankID:
688 switch (OpSize) {
689 case 8:
690 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
691 case 16:
692 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
693 case 32:
694 return isStore ? AArch64::STRWui : AArch64::LDRWui;
695 case 64:
696 return isStore ? AArch64::STRXui : AArch64::LDRXui;
697 }
698 break;
699 case AArch64::FPRRegBankID:
700 switch (OpSize) {
701 case 8:
702 return isStore ? AArch64::STRBui : AArch64::LDRBui;
703 case 16:
704 return isStore ? AArch64::STRHui : AArch64::LDRHui;
705 case 32:
706 return isStore ? AArch64::STRSui : AArch64::LDRSui;
707 case 64:
708 return isStore ? AArch64::STRDui : AArch64::LDRDui;
709 }
710 break;
711 }
712 return GenericOpc;
713}
714
715#ifndef NDEBUG
716/// Helper function that verifies that we have a valid copy at the end of
717/// selectCopy. Verifies that the source and dest have the expected sizes and
718/// then returns true.
719static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
720 const MachineRegisterInfo &MRI,
721 const TargetRegisterInfo &TRI,
722 const RegisterBankInfo &RBI) {
723 const Register DstReg = I.getOperand(0).getReg();
724 const Register SrcReg = I.getOperand(1).getReg();
725 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
726 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
727
728 // Make sure the size of the source and dest line up.
729 assert((((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
730 (DstSize == SrcSize ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
731 // Copies are a mean to setup initial types, the number of(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
732 // bits may not exactly match.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
733 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
734 // Copies are a mean to copy bits around, as long as we are(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
735 // on the same register class, that's fine. Otherwise, that(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
736 // means we need some SUBREG_TO_REG or AND & co.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
737 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
738 "Copy with different width?!")(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 738, __PRETTY_FUNCTION__))
;
739
740 // Check the size of the destination.
741 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 742, __PRETTY_FUNCTION__))
742 "GPRs cannot get more than 64-bit width values")(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 742, __PRETTY_FUNCTION__))
;
743
744 return true;
745}
746#endif
747
748/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
749/// to \p *To.
750///
751/// E.g "To = COPY SrcReg:SubReg"
752static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
753 const RegisterBankInfo &RBI, Register SrcReg,
754 const TargetRegisterClass *To, unsigned SubReg) {
755 assert(SrcReg.isValid() && "Expected a valid source register?")((SrcReg.isValid() && "Expected a valid source register?"
) ? static_cast<void> (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 755, __PRETTY_FUNCTION__))
;
756 assert(To && "Destination register class cannot be null")((To && "Destination register class cannot be null") ?
static_cast<void> (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 756, __PRETTY_FUNCTION__))
;
757 assert(SubReg && "Expected a valid subregister")((SubReg && "Expected a valid subregister") ? static_cast
<void> (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 757, __PRETTY_FUNCTION__))
;
758
759 MachineIRBuilder MIB(I);
760 auto SubRegCopy =
761 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
762 MachineOperand &RegOp = I.getOperand(1);
763 RegOp.setReg(SubRegCopy.getReg(0));
764
765 // It's possible that the destination register won't be constrained. Make
766 // sure that happens.
767 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
768 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
769
770 return true;
771}
772
773/// Helper function to get the source and destination register classes for a
774/// copy. Returns a std::pair containing the source register class for the
775/// copy, and the destination register class for the copy. If a register class
776/// cannot be determined, then it will be nullptr.
777static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
778getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
779 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
780 const RegisterBankInfo &RBI) {
781 Register DstReg = I.getOperand(0).getReg();
782 Register SrcReg = I.getOperand(1).getReg();
783 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
784 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
785 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
786 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
787
788 // Special casing for cross-bank copies of s1s. We can technically represent
789 // a 1-bit value with any size of register. The minimum size for a GPR is 32
790 // bits. So, we need to put the FPR on 32 bits as well.
791 //
792 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
793 // then we can pull it into the helpers that get the appropriate class for a
794 // register bank. Or make a new helper that carries along some constraint
795 // information.
796 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
797 SrcSize = DstSize = 32;
798
799 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
800 getMinClassForRegBank(DstRegBank, DstSize, true)};
801}
802
803static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
804 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
805 const RegisterBankInfo &RBI) {
806 Register DstReg = I.getOperand(0).getReg();
807 Register SrcReg = I.getOperand(1).getReg();
808 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
809 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
810
811 // Find the correct register classes for the source and destination registers.
812 const TargetRegisterClass *SrcRC;
813 const TargetRegisterClass *DstRC;
814 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
815
816 if (!DstRC) {
817 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
818 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
819 return false;
820 }
821
822 // A couple helpers below, for making sure that the copy we produce is valid.
823
824 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
825 // to verify that the src and dst are the same size, since that's handled by
826 // the SUBREG_TO_REG.
827 bool KnownValid = false;
828
829 // Returns true, or asserts if something we don't expect happens. Instead of
830 // returning true, we return isValidCopy() to ensure that we verify the
831 // result.
832 auto CheckCopy = [&]() {
833 // If we have a bitcast or something, we can't have physical registers.
834 assert((I.isCopy() ||(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 837, __PRETTY_FUNCTION__))
835 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 837, __PRETTY_FUNCTION__))
836 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 837, __PRETTY_FUNCTION__))
837 "No phys reg on generic operator!")(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 837, __PRETTY_FUNCTION__))
;
838 bool ValidCopy = true;
839#ifndef NDEBUG
840 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
841 assert(ValidCopy && "Invalid copy.")((ValidCopy && "Invalid copy.") ? static_cast<void
> (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 841, __PRETTY_FUNCTION__))
;
842#endif
843 return ValidCopy;
844 };
845
846 // Is this a copy? If so, then we may need to insert a subregister copy.
847 if (I.isCopy()) {
848 // Yes. Check if there's anything to fix up.
849 if (!SrcRC) {
850 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
851 return false;
852 }
853
854 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
855 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
856 unsigned SubReg;
857
858 // If the source bank doesn't support a subregister copy small enough,
859 // then we first need to copy to the destination bank.
860 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
861 const TargetRegisterClass *DstTempRC =
862 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
863 getSubRegForClass(DstRC, TRI, SubReg);
864
865 MachineIRBuilder MIB(I);
866 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
867 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
868 } else if (SrcSize > DstSize) {
869 // If the source register is bigger than the destination we need to
870 // perform a subregister copy.
871 const TargetRegisterClass *SubRegRC =
872 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
873 getSubRegForClass(SubRegRC, TRI, SubReg);
874 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
875 } else if (DstSize > SrcSize) {
876 // If the destination register is bigger than the source we need to do
877 // a promotion using SUBREG_TO_REG.
878 const TargetRegisterClass *PromotionRC =
879 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
880 getSubRegForClass(SrcRC, TRI, SubReg);
881
882 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
883 BuildMI(*I.getParent(), I, I.getDebugLoc(),
884 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
885 .addImm(0)
886 .addUse(SrcReg)
887 .addImm(SubReg);
888 MachineOperand &RegOp = I.getOperand(1);
889 RegOp.setReg(PromoteReg);
890
891 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
892 KnownValid = true;
893 }
894
895 // If the destination is a physical register, then there's nothing to
896 // change, so we're done.
897 if (Register::isPhysicalRegister(DstReg))
898 return CheckCopy();
899 }
900
901 // No need to constrain SrcReg. It will get constrained when we hit another
902 // of its use or its defs. Copies do not have constraints.
903 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
904 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
905 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
906 return false;
907 }
908 I.setDesc(TII.get(AArch64::COPY));
909 return CheckCopy();
910}
911
912static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
913 if (!DstTy.isScalar() || !SrcTy.isScalar())
914 return GenericOpc;
915
916 const unsigned DstSize = DstTy.getSizeInBits();
917 const unsigned SrcSize = SrcTy.getSizeInBits();
918
919 switch (DstSize) {
920 case 32:
921 switch (SrcSize) {
922 case 32:
923 switch (GenericOpc) {
924 case TargetOpcode::G_SITOFP:
925 return AArch64::SCVTFUWSri;
926 case TargetOpcode::G_UITOFP:
927 return AArch64::UCVTFUWSri;
928 case TargetOpcode::G_FPTOSI:
929 return AArch64::FCVTZSUWSr;
930 case TargetOpcode::G_FPTOUI:
931 return AArch64::FCVTZUUWSr;
932 default:
933 return GenericOpc;
934 }
935 case 64:
936 switch (GenericOpc) {
937 case TargetOpcode::G_SITOFP:
938 return AArch64::SCVTFUXSri;
939 case TargetOpcode::G_UITOFP:
940 return AArch64::UCVTFUXSri;
941 case TargetOpcode::G_FPTOSI:
942 return AArch64::FCVTZSUWDr;
943 case TargetOpcode::G_FPTOUI:
944 return AArch64::FCVTZUUWDr;
945 default:
946 return GenericOpc;
947 }
948 default:
949 return GenericOpc;
950 }
951 case 64:
952 switch (SrcSize) {
953 case 32:
954 switch (GenericOpc) {
955 case TargetOpcode::G_SITOFP:
956 return AArch64::SCVTFUWDri;
957 case TargetOpcode::G_UITOFP:
958 return AArch64::UCVTFUWDri;
959 case TargetOpcode::G_FPTOSI:
960 return AArch64::FCVTZSUXSr;
961 case TargetOpcode::G_FPTOUI:
962 return AArch64::FCVTZUUXSr;
963 default:
964 return GenericOpc;
965 }
966 case 64:
967 switch (GenericOpc) {
968 case TargetOpcode::G_SITOFP:
969 return AArch64::SCVTFUXDri;
970 case TargetOpcode::G_UITOFP:
971 return AArch64::UCVTFUXDri;
972 case TargetOpcode::G_FPTOSI:
973 return AArch64::FCVTZSUXDr;
974 case TargetOpcode::G_FPTOUI:
975 return AArch64::FCVTZUUXDr;
976 default:
977 return GenericOpc;
978 }
979 default:
980 return GenericOpc;
981 }
982 default:
983 return GenericOpc;
984 };
985 return GenericOpc;
986}
987
988static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
989 const RegisterBankInfo &RBI) {
990 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
991 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
992 AArch64::GPRRegBankID);
993 LLT Ty = MRI.getType(I.getOperand(0).getReg());
994 if (Ty == LLT::scalar(32))
995 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
996 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
997 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
998 return 0;
999}
1000
1001/// Helper function to select the opcode for a G_FCMP.
1002static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
1003 // If this is a compare against +0.0, then we don't have to explicitly
1004 // materialize a constant.
1005 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
1006 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
1007 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1008 if (OpSize != 32 && OpSize != 64)
1009 return 0;
1010 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
1011 {AArch64::FCMPSri, AArch64::FCMPDri}};
1012 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
1013}
1014
1015/// Returns true if \p P is an unsigned integer comparison predicate.
1016static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
1017 switch (P) {
1018 default:
1019 return false;
1020 case CmpInst::ICMP_UGT:
1021 case CmpInst::ICMP_UGE:
1022 case CmpInst::ICMP_ULT:
1023 case CmpInst::ICMP_ULE:
1024 return true;
1025 }
1026}
1027
1028static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1029 switch (P) {
1030 default:
1031 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1031)
;
1032 case CmpInst::ICMP_NE:
1033 return AArch64CC::NE;
1034 case CmpInst::ICMP_EQ:
1035 return AArch64CC::EQ;
1036 case CmpInst::ICMP_SGT:
1037 return AArch64CC::GT;
1038 case CmpInst::ICMP_SGE:
1039 return AArch64CC::GE;
1040 case CmpInst::ICMP_SLT:
1041 return AArch64CC::LT;
1042 case CmpInst::ICMP_SLE:
1043 return AArch64CC::LE;
1044 case CmpInst::ICMP_UGT:
1045 return AArch64CC::HI;
1046 case CmpInst::ICMP_UGE:
1047 return AArch64CC::HS;
1048 case CmpInst::ICMP_ULT:
1049 return AArch64CC::LO;
1050 case CmpInst::ICMP_ULE:
1051 return AArch64CC::LS;
1052 }
1053}
1054
1055static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1056 AArch64CC::CondCode &CondCode,
1057 AArch64CC::CondCode &CondCode2) {
1058 CondCode2 = AArch64CC::AL;
1059 switch (P) {
1060 default:
1061 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1061)
;
1062 case CmpInst::FCMP_OEQ:
1063 CondCode = AArch64CC::EQ;
1064 break;
1065 case CmpInst::FCMP_OGT:
1066 CondCode = AArch64CC::GT;
1067 break;
1068 case CmpInst::FCMP_OGE:
1069 CondCode = AArch64CC::GE;
1070 break;
1071 case CmpInst::FCMP_OLT:
1072 CondCode = AArch64CC::MI;
1073 break;
1074 case CmpInst::FCMP_OLE:
1075 CondCode = AArch64CC::LS;
1076 break;
1077 case CmpInst::FCMP_ONE:
1078 CondCode = AArch64CC::MI;
1079 CondCode2 = AArch64CC::GT;
1080 break;
1081 case CmpInst::FCMP_ORD:
1082 CondCode = AArch64CC::VC;
1083 break;
1084 case CmpInst::FCMP_UNO:
1085 CondCode = AArch64CC::VS;
1086 break;
1087 case CmpInst::FCMP_UEQ:
1088 CondCode = AArch64CC::EQ;
1089 CondCode2 = AArch64CC::VS;
1090 break;
1091 case CmpInst::FCMP_UGT:
1092 CondCode = AArch64CC::HI;
1093 break;
1094 case CmpInst::FCMP_UGE:
1095 CondCode = AArch64CC::PL;
1096 break;
1097 case CmpInst::FCMP_ULT:
1098 CondCode = AArch64CC::LT;
1099 break;
1100 case CmpInst::FCMP_ULE:
1101 CondCode = AArch64CC::LE;
1102 break;
1103 case CmpInst::FCMP_UNE:
1104 CondCode = AArch64CC::NE;
1105 break;
1106 }
1107}
1108
1109/// Return a register which can be used as a bit to test in a TB(N)Z.
1110static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1111 MachineRegisterInfo &MRI) {
1112 assert(Reg.isValid() && "Expected valid register!")((Reg.isValid() && "Expected valid register!") ? static_cast
<void> (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1112, __PRETTY_FUNCTION__))
;
1113 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1114 unsigned Opc = MI->getOpcode();
1115
1116 if (!MI->getOperand(0).isReg() ||
1117 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1118 break;
1119
1120 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1121 //
1122 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1123 // on the truncated x is the same as the bit number on x.
1124 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1125 Opc == TargetOpcode::G_TRUNC) {
1126 Register NextReg = MI->getOperand(1).getReg();
1127 // Did we find something worth folding?
1128 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1129 break;
1130
1131 // NextReg is worth folding. Keep looking.
1132 Reg = NextReg;
1133 continue;
1134 }
1135
1136 // Attempt to find a suitable operation with a constant on one side.
1137 Optional<uint64_t> C;
1138 Register TestReg;
1139 switch (Opc) {
1140 default:
1141 break;
1142 case TargetOpcode::G_AND:
1143 case TargetOpcode::G_XOR: {
1144 TestReg = MI->getOperand(1).getReg();
1145 Register ConstantReg = MI->getOperand(2).getReg();
1146 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1147 if (!VRegAndVal) {
1148 // AND commutes, check the other side for a constant.
1149 // FIXME: Can we canonicalize the constant so that it's always on the
1150 // same side at some point earlier?
1151 std::swap(ConstantReg, TestReg);
1152 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1153 }
1154 if (VRegAndVal)
1155 C = VRegAndVal->Value;
1156 break;
1157 }
1158 case TargetOpcode::G_ASHR:
1159 case TargetOpcode::G_LSHR:
1160 case TargetOpcode::G_SHL: {
1161 TestReg = MI->getOperand(1).getReg();
1162 auto VRegAndVal =
1163 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1164 if (VRegAndVal)
1165 C = VRegAndVal->Value;
1166 break;
1167 }
1168 }
1169
1170 // Didn't find a constant or viable register. Bail out of the loop.
1171 if (!C || !TestReg.isValid())
1172 break;
1173
1174 // We found a suitable instruction with a constant. Check to see if we can
1175 // walk through the instruction.
1176 Register NextReg;
1177 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1178 switch (Opc) {
1179 default:
1180 break;
1181 case TargetOpcode::G_AND:
1182 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1183 if ((*C >> Bit) & 1)
1184 NextReg = TestReg;
1185 break;
1186 case TargetOpcode::G_SHL:
1187 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1188 // the type of the register.
1189 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1190 NextReg = TestReg;
1191 Bit = Bit - *C;
1192 }
1193 break;
1194 case TargetOpcode::G_ASHR:
1195 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1196 // in x
1197 NextReg = TestReg;
1198 Bit = Bit + *C;
1199 if (Bit >= TestRegSize)
1200 Bit = TestRegSize - 1;
1201 break;
1202 case TargetOpcode::G_LSHR:
1203 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1204 if ((Bit + *C) < TestRegSize) {
1205 NextReg = TestReg;
1206 Bit = Bit + *C;
1207 }
1208 break;
1209 case TargetOpcode::G_XOR:
1210 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1211 // appropriate.
1212 //
1213 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1214 //
1215 // tbz x', b -> tbnz x, b
1216 //
1217 // Because x' only has the b-th bit set if x does not.
1218 if ((*C >> Bit) & 1)
1219 Invert = !Invert;
1220 NextReg = TestReg;
1221 break;
1222 }
1223
1224 // Check if we found anything worth folding.
1225 if (!NextReg.isValid())
1226 return Reg;
1227 Reg = NextReg;
1228 }
1229
1230 return Reg;
1231}
1232
1233MachineInstr *AArch64InstructionSelector::emitTestBit(
1234 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1235 MachineIRBuilder &MIB) const {
1236 assert(TestReg.isValid())((TestReg.isValid()) ? static_cast<void> (0) : __assert_fail
("TestReg.isValid()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1236, __PRETTY_FUNCTION__))
;
1237 assert(ProduceNonFlagSettingCondBr &&((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1238, __PRETTY_FUNCTION__))
1238 "Cannot emit TB(N)Z with speculation tracking!")((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1238, __PRETTY_FUNCTION__))
;
1239 MachineRegisterInfo &MRI = *MIB.getMRI();
1240
1241 // Attempt to optimize the test bit by walking over instructions.
1242 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1243 LLT Ty = MRI.getType(TestReg);
1244 unsigned Size = Ty.getSizeInBits();
1245 assert(!Ty.isVector() && "Expected a scalar!")((!Ty.isVector() && "Expected a scalar!") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1245, __PRETTY_FUNCTION__))
;
1246 assert(Bit < 64 && "Bit is too large!")((Bit < 64 && "Bit is too large!") ? static_cast<
void> (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1246, __PRETTY_FUNCTION__))
;
1247
1248 // When the test register is a 64-bit register, we have to narrow to make
1249 // TBNZW work.
1250 bool UseWReg = Bit < 32;
1251 unsigned NecessarySize = UseWReg ? 32 : 64;
1252 if (Size < NecessarySize)
1253 TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
1254 else if (Size > NecessarySize)
1255 TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
1256
1257 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1258 {AArch64::TBZW, AArch64::TBNZW}};
1259 unsigned Opc = OpcTable[UseWReg][IsNegative];
1260 auto TestBitMI =
1261 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1262 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1263 return &*TestBitMI;
1264}
1265
1266bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1267 MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
1268 MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
1269 // Given something like this:
1270 //
1271 // %x = ...Something...
1272 // %one = G_CONSTANT i64 1
1273 // %zero = G_CONSTANT i64 0
1274 // %and = G_AND %x, %one
1275 // %cmp = G_ICMP intpred(ne), %and, %zero
1276 // %cmp_trunc = G_TRUNC %cmp
1277 // G_BRCOND %cmp_trunc, %bb.3
1278 //
1279 // We want to try and fold the AND into the G_BRCOND and produce either a
1280 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1281 //
1282 // In this case, we'd get
1283 //
1284 // TBNZ %x %bb.3
1285 //
1286 if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
1287 return false;
1288
1289 // Need to be comparing against 0 to fold.
1290 if (CmpConstant != 0)
1291 return false;
1292
1293 MachineRegisterInfo &MRI = *MIB.getMRI();
1294
1295 // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
1296 // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
1297 // so folding would be redundant.
1298 if (Pred != CmpInst::Predicate::ICMP_EQ &&
1299 Pred != CmpInst::Predicate::ICMP_NE)
1300 return false;
1301
1302 // Check if the AND has a constant on its RHS which we can use as a mask.
1303 // If it's a power of 2, then it's the same as checking a specific bit.
1304 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1305 auto MaybeBit =
1306 getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
1307 if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1308 return false;
1309
1310 uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1311 Register TestReg = AndInst->getOperand(1).getReg();
1312 bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
1313
1314 // Emit a TB(N)Z.
1315 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1316 return true;
1317}
1318
1319bool AArch64InstructionSelector::selectCompareBranch(
1320 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1321
1322 const Register CondReg = I.getOperand(0).getReg();
1323 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1324 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1325 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
1326 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
1327 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
1328 return false;
1329
1330 Register LHS = CCMI->getOperand(2).getReg();
1331 Register RHS = CCMI->getOperand(3).getReg();
1332 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1333 MachineIRBuilder MIB(I);
1334 CmpInst::Predicate Pred =
1335 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1336 MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
1337
1338 // When we can emit a TB(N)Z, prefer that.
1339 //
1340 // Handle non-commutative condition codes first.
1341 // Note that we don't want to do this when we have a G_AND because it can
1342 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1343 if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
1344 int64_t C = VRegAndVal->Value;
1345
1346 // When we have a greater-than comparison, we can just test if the msb is
1347 // zero.
1348 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1349 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1350 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1351 I.eraseFromParent();
1352 return true;
1353 }
1354
1355 // When we have a less than comparison, we can just test if the msb is not
1356 // zero.
1357 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1358 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1359 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1360 I.eraseFromParent();
1361 return true;
1362 }
1363 }
1364
1365 if (!VRegAndVal) {
1366 std::swap(RHS, LHS);
1367 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1368 LHSMI = getDefIgnoringCopies(LHS, MRI);
1369 }
1370
1371 if (!VRegAndVal || VRegAndVal->Value != 0) {
1372 // If we can't select a CBZ then emit a cmp + Bcc.
1373 MachineInstr *Cmp;
1374 std::tie(Cmp, Pred) = emitIntegerCompare(
1375 CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
1376 if (!Cmp)
1377 return false;
1378 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
1379 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1380 I.eraseFromParent();
1381 return true;
1382 }
1383
1384 // Try to emit a TB(N)Z for an eq or ne condition.
1385 if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
1386 MIB)) {
1387 I.eraseFromParent();
1388 return true;
1389 }
1390
1391 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1392 if (RB.getID() != AArch64::GPRRegBankID)
1393 return false;
1394 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1395 return false;
1396
1397 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1398 unsigned CBOpc = 0;
1399 if (CmpWidth <= 32)
1400 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1401 else if (CmpWidth == 64)
1402 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1403 else
1404 return false;
1405
1406 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1407 .addUse(LHS)
1408 .addMBB(DestMBB)
1409 .constrainAllUses(TII, TRI, RBI);
1410
1411 I.eraseFromParent();
1412 return true;
1413}
1414
1415/// Returns the element immediate value of a vector shift operand if found.
1416/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1417static Optional<int64_t> getVectorShiftImm(Register Reg,
1418 MachineRegisterInfo &MRI) {
1419 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")((MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1419, __PRETTY_FUNCTION__))
;
1420 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1421 assert(OpMI && "Expected to find a vreg def for vector shift operand")((OpMI && "Expected to find a vreg def for vector shift operand"
) ? static_cast<void> (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1421, __PRETTY_FUNCTION__))
;
1422 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1423 return None;
1424
1425 // Check all operands are identical immediates.
1426 int64_t ImmVal = 0;
1427 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1428 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1429 if (!VRegAndVal)
1430 return None;
1431
1432 if (Idx == 1)
1433 ImmVal = VRegAndVal->Value;
1434 if (ImmVal != VRegAndVal->Value)
1435 return None;
1436 }
1437
1438 return ImmVal;
1439}
1440
1441/// Matches and returns the shift immediate value for a SHL instruction given
1442/// a shift operand.
1443static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1444 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1445 if (!ShiftImm)
1446 return None;
1447 // Check the immediate is in range for a SHL.
1448 int64_t Imm = *ShiftImm;
1449 if (Imm < 0)
1450 return None;
1451 switch (SrcTy.getElementType().getSizeInBits()) {
1452 default:
1453 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1454 return None;
1455 case 8:
1456 if (Imm > 7)
1457 return None;
1458 break;
1459 case 16:
1460 if (Imm > 15)
1461 return None;
1462 break;
1463 case 32:
1464 if (Imm > 31)
1465 return None;
1466 break;
1467 case 64:
1468 if (Imm > 63)
1469 return None;
1470 break;
1471 }
1472 return Imm;
1473}
1474
1475bool AArch64InstructionSelector::selectVectorSHL(
1476 MachineInstr &I, MachineRegisterInfo &MRI) const {
1477 assert(I.getOpcode() == TargetOpcode::G_SHL)((I.getOpcode() == TargetOpcode::G_SHL) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1477, __PRETTY_FUNCTION__))
;
1478 Register DstReg = I.getOperand(0).getReg();
1479 const LLT Ty = MRI.getType(DstReg);
1480 Register Src1Reg = I.getOperand(1).getReg();
1481 Register Src2Reg = I.getOperand(2).getReg();
1482
1483 if (!Ty.isVector())
1484 return false;
1485
1486 // Check if we have a vector of constants on RHS that we can select as the
1487 // immediate form.
1488 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1489
1490 unsigned Opc = 0;
1491 if (Ty == LLT::vector(2, 64)) {
1492 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1493 } else if (Ty == LLT::vector(4, 32)) {
1494 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1495 } else if (Ty == LLT::vector(2, 32)) {
1496 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1497 } else if (Ty == LLT::vector(4, 16)) {
1498 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1499 } else if (Ty == LLT::vector(8, 16)) {
1500 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1501 } else if (Ty == LLT::vector(16, 8)) {
1502 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1503 } else {
1504 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1505 return false;
1506 }
1507
1508 MachineIRBuilder MIB(I);
1509 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1510 if (ImmVal)
1511 Shl.addImm(*ImmVal);
1512 else
1513 Shl.addUse(Src2Reg);
1514 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1515 I.eraseFromParent();
1516 return true;
1517}
1518
1519bool AArch64InstructionSelector::selectVectorAshrLshr(
1520 MachineInstr &I, MachineRegisterInfo &MRI) const {
1521 assert(I.getOpcode() == TargetOpcode::G_ASHR ||((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1522, __PRETTY_FUNCTION__))
1522 I.getOpcode() == TargetOpcode::G_LSHR)((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1522, __PRETTY_FUNCTION__))
;
1523 Register DstReg = I.getOperand(0).getReg();
1524 const LLT Ty = MRI.getType(DstReg);
1525 Register Src1Reg = I.getOperand(1).getReg();
1526 Register Src2Reg = I.getOperand(2).getReg();
1527
1528 if (!Ty.isVector())
1529 return false;
1530
1531 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1532
1533 // We expect the immediate case to be lowered in the PostLegalCombiner to
1534 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1535
1536 // There is not a shift right register instruction, but the shift left
1537 // register instruction takes a signed value, where negative numbers specify a
1538 // right shift.
1539
1540 unsigned Opc = 0;
1541 unsigned NegOpc = 0;
1542 const TargetRegisterClass *RC =
1543 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1544 if (Ty == LLT::vector(2, 64)) {
1545 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1546 NegOpc = AArch64::NEGv2i64;
1547 } else if (Ty == LLT::vector(4, 32)) {
1548 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1549 NegOpc = AArch64::NEGv4i32;
1550 } else if (Ty == LLT::vector(2, 32)) {
1551 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1552 NegOpc = AArch64::NEGv2i32;
1553 } else if (Ty == LLT::vector(4, 16)) {
1554 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1555 NegOpc = AArch64::NEGv4i16;
1556 } else if (Ty == LLT::vector(8, 16)) {
1557 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1558 NegOpc = AArch64::NEGv8i16;
1559 } else if (Ty == LLT::vector(16, 8)) {
1560 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1561 NegOpc = AArch64::NEGv8i16;
1562 } else {
1563 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1564 return false;
1565 }
1566
1567 MachineIRBuilder MIB(I);
1568 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1569 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1570 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1571 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1572 I.eraseFromParent();
1573 return true;
1574}
1575
1576bool AArch64InstructionSelector::selectVaStartAAPCS(
1577 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1578 return false;
1579}
1580
1581bool AArch64InstructionSelector::selectVaStartDarwin(
1582 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1583 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1584 Register ListReg = I.getOperand(0).getReg();
1585
1586 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1587
1588 auto MIB =
1589 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1590 .addDef(ArgsAddrReg)
1591 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1592 .addImm(0)
1593 .addImm(0);
1594
1595 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1596
1597 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1598 .addUse(ArgsAddrReg)
1599 .addUse(ListReg)
1600 .addImm(0)
1601 .addMemOperand(*I.memoperands_begin());
1602
1603 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1604 I.eraseFromParent();
1605 return true;
1606}
1607
1608void AArch64InstructionSelector::materializeLargeCMVal(
1609 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1610 MachineBasicBlock &MBB = *I.getParent();
1611 MachineFunction &MF = *MBB.getParent();
1612 MachineRegisterInfo &MRI = MF.getRegInfo();
1613 MachineIRBuilder MIB(I);
1614
1615 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1616 MovZ->addOperand(MF, I.getOperand(1));
1617 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1618 AArch64II::MO_NC);
1619 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1620 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1621
1622 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1623 Register ForceDstReg) {
1624 Register DstReg = ForceDstReg
1625 ? ForceDstReg
1626 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1627 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1628 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1629 MovI->addOperand(MF, MachineOperand::CreateGA(
1630 GV, MovZ->getOperand(1).getOffset(), Flags));
1631 } else {
1632 MovI->addOperand(
1633 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1634 MovZ->getOperand(1).getOffset(), Flags));
1635 }
1636 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1637 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1638 return DstReg;
1639 };
1640 Register DstReg = BuildMovK(MovZ.getReg(0),
1641 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1642 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1643 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1644 return;
1645}
1646
1647bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1648 MachineBasicBlock &MBB = *I.getParent();
1649 MachineFunction &MF = *MBB.getParent();
1650 MachineRegisterInfo &MRI = MF.getRegInfo();
1651
1652 switch (I.getOpcode()) {
1653 case TargetOpcode::G_SHL:
1654 case TargetOpcode::G_ASHR:
1655 case TargetOpcode::G_LSHR: {
1656 // These shifts are legalized to have 64 bit shift amounts because we want
1657 // to take advantage of the existing imported selection patterns that assume
1658 // the immediates are s64s. However, if the shifted type is 32 bits and for
1659 // some reason we receive input GMIR that has an s64 shift amount that's not
1660 // a G_CONSTANT, insert a truncate so that we can still select the s32
1661 // register-register variant.
1662 Register SrcReg = I.getOperand(1).getReg();
1663 Register ShiftReg = I.getOperand(2).getReg();
1664 const LLT ShiftTy = MRI.getType(ShiftReg);
1665 const LLT SrcTy = MRI.getType(SrcReg);
1666 if (SrcTy.isVector())
1667 return false;
1668 assert(!ShiftTy.isVector() && "unexpected vector shift ty")((!ShiftTy.isVector() && "unexpected vector shift ty"
) ? static_cast<void> (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1668, __PRETTY_FUNCTION__))
;
1669 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1670 return false;
1671 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1672 assert(AmtMI && "could not find a vreg definition for shift amount")((AmtMI && "could not find a vreg definition for shift amount"
) ? static_cast<void> (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1672, __PRETTY_FUNCTION__))
;
1673 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1674 // Insert a subregister copy to implement a 64->32 trunc
1675 MachineIRBuilder MIB(I);
1676 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1677 .addReg(ShiftReg, 0, AArch64::sub_32);
1678 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1679 I.getOperand(2).setReg(Trunc.getReg(0));
1680 }
1681 return true;
1682 }
1683 case TargetOpcode::G_STORE:
1684 return contractCrossBankCopyIntoStore(I, MRI);
1685 case TargetOpcode::G_PTR_ADD:
1686 return convertPtrAddToAdd(I, MRI);
1687 case TargetOpcode::G_LOAD: {
1688 // For scalar loads of pointers, we try to convert the dest type from p0
1689 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1690 // conversion, this should be ok because all users should have been
1691 // selected already, so the type doesn't matter for them.
1692 Register DstReg = I.getOperand(0).getReg();
1693 const LLT DstTy = MRI.getType(DstReg);
1694 if (!DstTy.isPointer())
1695 return false;
1696 MRI.setType(DstReg, LLT::scalar(64));
1697 return true;
1698 }
1699 default:
1700 return false;
1701 }
1702}
1703
1704/// This lowering tries to look for G_PTR_ADD instructions and then converts
1705/// them to a standard G_ADD with a COPY on the source.
1706///
1707/// The motivation behind this is to expose the add semantics to the imported
1708/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1709/// because the selector works bottom up, uses before defs. By the time we
1710/// end up trying to select a G_PTR_ADD, we should have already attempted to
1711/// fold this into addressing modes and were therefore unsuccessful.
1712bool AArch64InstructionSelector::convertPtrAddToAdd(
1713 MachineInstr &I, MachineRegisterInfo &MRI) {
1714 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")((I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1714, __PRETTY_FUNCTION__))
;
1715 Register DstReg = I.getOperand(0).getReg();
1716 Register AddOp1Reg = I.getOperand(1).getReg();
1717 const LLT PtrTy = MRI.getType(DstReg);
1718 if (PtrTy.getAddressSpace() != 0)
1719 return false;
1720
1721 MachineIRBuilder MIB(I);
1722 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1723 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1724 // Set regbanks on the registers.
1725 if (PtrTy.isVector())
1726 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1727 else
1728 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1729
1730 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1731 // %dst(intty) = G_ADD %intbase, off
1732 I.setDesc(TII.get(TargetOpcode::G_ADD));
1733 MRI.setType(DstReg, CastPtrTy);
1734 I.getOperand(1).setReg(PtrToInt.getReg(0));
1735 if (!select(*PtrToInt)) {
1736 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
1737 return false;
1738 }
1739 return true;
1740}
1741
1742bool AArch64InstructionSelector::earlySelectSHL(
1743 MachineInstr &I, MachineRegisterInfo &MRI) const {
1744 // We try to match the immediate variant of LSL, which is actually an alias
1745 // for a special case of UBFM. Otherwise, we fall back to the imported
1746 // selector which will match the register variant.
1747 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")((I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1747, __PRETTY_FUNCTION__))
;
1748 const auto &MO = I.getOperand(2);
1749 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1750 if (!VRegAndVal)
1751 return false;
1752
1753 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1754 if (DstTy.isVector())
1755 return false;
1756 bool Is64Bit = DstTy.getSizeInBits() == 64;
1757 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1758 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1759 MachineIRBuilder MIB(I);
1760
1761 if (!Imm1Fn || !Imm2Fn)
1762 return false;
1763
1764 auto NewI =
1765 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1766 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1767
1768 for (auto &RenderFn : *Imm1Fn)
1769 RenderFn(NewI);
1770 for (auto &RenderFn : *Imm2Fn)
1771 RenderFn(NewI);
1772
1773 I.eraseFromParent();
1774 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1775}
1776
1777bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1778 MachineInstr &I, MachineRegisterInfo &MRI) {
1779 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")((I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1779, __PRETTY_FUNCTION__))
;
1780 // If we're storing a scalar, it doesn't matter what register bank that
1781 // scalar is on. All that matters is the size.
1782 //
1783 // So, if we see something like this (with a 32-bit scalar as an example):
1784 //
1785 // %x:gpr(s32) = ... something ...
1786 // %y:fpr(s32) = COPY %x:gpr(s32)
1787 // G_STORE %y:fpr(s32)
1788 //
1789 // We can fix this up into something like this:
1790 //
1791 // G_STORE %x:gpr(s32)
1792 //
1793 // And then continue the selection process normally.
1794 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
1795 if (!DefDstReg.isValid())
1796 return false;
1797 LLT DefDstTy = MRI.getType(DefDstReg);
1798 Register StoreSrcReg = I.getOperand(0).getReg();
1799 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1800
1801 // If we get something strange like a physical register, then we shouldn't
1802 // go any further.
1803 if (!DefDstTy.isValid())
1804 return false;
1805
1806 // Are the source and dst types the same size?
1807 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1808 return false;
1809
1810 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1811 RBI.getRegBank(DefDstReg, MRI, TRI))
1812 return false;
1813
1814 // We have a cross-bank copy, which is entering a store. Let's fold it.
1815 I.getOperand(0).setReg(DefDstReg);
1816 return true;
1817}
1818
1819bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1820 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1820, __PRETTY_FUNCTION__))
;
1821 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1821, __PRETTY_FUNCTION__))
;
1822
1823 MachineBasicBlock &MBB = *I.getParent();
1824 MachineFunction &MF = *MBB.getParent();
1825 MachineRegisterInfo &MRI = MF.getRegInfo();
1826
1827 switch (I.getOpcode()) {
1828 case TargetOpcode::G_BR: {
1829 // If the branch jumps to the fallthrough block, don't bother emitting it.
1830 // Only do this for -O0 for a good code size improvement, because when
1831 // optimizations are enabled we want to leave this choice to
1832 // MachineBlockPlacement.
1833 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
1834 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
1835 return false;
1836 I.eraseFromParent();
1837 return true;
1838 }
1839 case TargetOpcode::G_SHL:
1840 return earlySelectSHL(I, MRI);
1841 case TargetOpcode::G_CONSTANT: {
1842 bool IsZero = false;
1843 if (I.getOperand(1).isCImm())
1844 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1845 else if (I.getOperand(1).isImm())
1846 IsZero = I.getOperand(1).getImm() == 0;
1847
1848 if (!IsZero)
1849 return false;
1850
1851 Register DefReg = I.getOperand(0).getReg();
1852 LLT Ty = MRI.getType(DefReg);
1853 if (Ty.getSizeInBits() == 64) {
1854 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1855 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1856 } else if (Ty.getSizeInBits() == 32) {
1857 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1858 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1859 } else
1860 return false;
1861
1862 I.setDesc(TII.get(TargetOpcode::COPY));
1863 return true;
1864 }
1865 default:
1866 return false;
1867 }
1868}
1869
1870bool AArch64InstructionSelector::select(MachineInstr &I) {
1871 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1871, __PRETTY_FUNCTION__))
;
1872 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1872, __PRETTY_FUNCTION__))
;
1873
1874 MachineBasicBlock &MBB = *I.getParent();
1875 MachineFunction &MF = *MBB.getParent();
1876 MachineRegisterInfo &MRI = MF.getRegInfo();
1877
1878 const AArch64Subtarget *Subtarget =
1879 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
1880 if (Subtarget->requiresStrictAlign()) {
1881 // We don't support this feature yet.
1882 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
1883 return false;
1884 }
1885
1886 unsigned Opcode = I.getOpcode();
1887 // G_PHI requires same handling as PHI
1888 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
1889 // Certain non-generic instructions also need some special handling.
1890
1891 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1892 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1893
1894 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1895 const Register DefReg = I.getOperand(0).getReg();
1896 const LLT DefTy = MRI.getType(DefReg);
1897
1898 const RegClassOrRegBank &RegClassOrBank =
1899 MRI.getRegClassOrRegBank(DefReg);
1900
1901 const TargetRegisterClass *DefRC
1902 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1903 if (!DefRC) {
1904 if (!DefTy.isValid()) {
1905 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
1906 return false;
1907 }
1908 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1909 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1910 if (!DefRC) {
1911 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
1912 return false;
1913 }
1914 }
1915
1916 I.setDesc(TII.get(TargetOpcode::PHI));
1917
1918 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1919 }
1920
1921 if (I.isCopy())
1922 return selectCopy(I, TII, MRI, TRI, RBI);
1923
1924 return true;
1925 }
1926
1927
1928 if (I.getNumOperands() != I.getNumExplicitOperands()) {
1929 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
1930 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
1931 return false;
1932 }
1933
1934 // Try to do some lowering before we start instruction selecting. These
1935 // lowerings are purely transformations on the input G_MIR and so selection
1936 // must continue after any modification of the instruction.
1937 if (preISelLower(I)) {
1938 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
1939 }
1940
1941 // There may be patterns where the importer can't deal with them optimally,
1942 // but does select it to a suboptimal sequence so our custom C++ selection
1943 // code later never has a chance to work on it. Therefore, we have an early
1944 // selection attempt here to give priority to certain selection routines
1945 // over the imported ones.
1946 if (earlySelect(I))
1947 return true;
1948
1949 if (selectImpl(I, *CoverageInfo))
1950 return true;
1951
1952 LLT Ty =
1953 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1954
1955 MachineIRBuilder MIB(I);
1956
1957 switch (Opcode) {
1958 case TargetOpcode::G_BRCOND: {
1959 if (Ty.getSizeInBits() > 32) {
1960 // We shouldn't need this on AArch64, but it would be implemented as an
1961 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1962 // bit being tested is < 32.
1963 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_BRCOND has type: " <<
Ty << ", expected at most 32-bits"; } } while (false)
1964 << ", expected at most 32-bits")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_BRCOND has type: " <<
Ty << ", expected at most 32-bits"; } } while (false)
;
1965 return false;
1966 }
1967
1968 Register CondReg = I.getOperand(0).getReg();
1969 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1970
1971 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1972 // instructions will not be produced, as they are conditional branch
1973 // instructions that do not set flags.
1974 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1975 return true;
1976
1977 if (ProduceNonFlagSettingCondBr) {
1978 unsigned BOpc = AArch64::TBNZW;
1979 // Try to fold a not, i.e. a xor, cond, 1.
1980 Register XorSrc;
1981 int64_t Cst;
1982 if (mi_match(CondReg, MRI,
1983 m_GTrunc(m_GXor(m_Reg(XorSrc), m_ICst(Cst)))) &&
1984 Cst == 1) {
1985 CondReg = XorSrc;
1986 BOpc = AArch64::TBZW;
1987 if (MRI.getType(XorSrc).getSizeInBits() > 32)
1988 BOpc = AArch64::TBZX;
1989 }
1990 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(BOpc))
1991 .addUse(CondReg)
1992 .addImm(/*bit offset=*/0)
1993 .addMBB(DestMBB);
1994
1995 I.eraseFromParent();
1996 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1997 } else {
1998 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1999 .addDef(AArch64::WZR)
2000 .addUse(CondReg)
2001 .addImm(1);
2002 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
2003 auto Bcc =
2004 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
2005 .addImm(AArch64CC::EQ)
2006 .addMBB(DestMBB);
2007
2008 I.eraseFromParent();
2009 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
2010 }
2011 }
2012
2013 case TargetOpcode::G_BRINDIRECT: {
2014 I.setDesc(TII.get(AArch64::BR));
2015 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2016 }
2017
2018 case TargetOpcode::G_BRJT:
2019 return selectBrJT(I, MRI);
2020
2021 case AArch64::G_ADD_LOW: {
2022 // This op may have been separated from it's ADRP companion by the localizer
2023 // or some other code motion pass. Given that many CPUs will try to
2024 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2025 // which will later be expanded into an ADRP+ADD pair after scheduling.
2026 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2027 if (BaseMI->getOpcode() != AArch64::ADRP) {
2028 I.setDesc(TII.get(AArch64::ADDXri));
2029 I.addOperand(MachineOperand::CreateImm(0));
2030 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2031 }
2032 assert(TM.getCodeModel() == CodeModel::Small &&((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2033, __PRETTY_FUNCTION__))
2033 "Expected small code model")((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2033, __PRETTY_FUNCTION__))
;
2034 MachineIRBuilder MIB(I);
2035 auto Op1 = BaseMI->getOperand(1);
2036 auto Op2 = I.getOperand(2);
2037 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2038 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2039 Op1.getTargetFlags())
2040 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2041 Op2.getTargetFlags());
2042 I.eraseFromParent();
2043 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2044 }
2045
2046 case TargetOpcode::G_BSWAP: {
2047 // Handle vector types for G_BSWAP directly.
2048 Register DstReg = I.getOperand(0).getReg();
2049 LLT DstTy = MRI.getType(DstReg);
2050
2051 // We should only get vector types here; everything else is handled by the
2052 // importer right now.
2053 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2054 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2055 return false;
2056 }
2057
2058 // Only handle 4 and 2 element vectors for now.
2059 // TODO: 16-bit elements.
2060 unsigned NumElts = DstTy.getNumElements();
2061 if (NumElts != 4 && NumElts != 2) {
2062 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2063 return false;
2064 }
2065
2066 // Choose the correct opcode for the supported types. Right now, that's
2067 // v2s32, v4s32, and v2s64.
2068 unsigned Opc = 0;
2069 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2070 if (EltSize == 32)
2071 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2072 : AArch64::REV32v16i8;
2073 else if (EltSize == 64)
2074 Opc = AArch64::REV64v16i8;
2075
2076 // We should always get something by the time we get here...
2077 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")((Opc != 0 && "Didn't get an opcode for G_BSWAP?") ? static_cast
<void> (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2077, __PRETTY_FUNCTION__))
;
2078
2079 I.setDesc(TII.get(Opc));
2080 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2081 }
2082
2083 case TargetOpcode::G_FCONSTANT:
2084 case TargetOpcode::G_CONSTANT: {
2085 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2086
2087 const LLT s8 = LLT::scalar(8);
2088 const LLT s16 = LLT::scalar(16);
2089 const LLT s32 = LLT::scalar(32);
2090 const LLT s64 = LLT::scalar(64);
2091 const LLT p0 = LLT::pointer(0, 64);
2092
2093 const Register DefReg = I.getOperand(0).getReg();
2094 const LLT DefTy = MRI.getType(DefReg);
2095 const unsigned DefSize = DefTy.getSizeInBits();
2096 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2097
2098 // FIXME: Redundant check, but even less readable when factored out.
2099 if (isFP) {
2100 if (Ty != s32 && Ty != s64) {
2101 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << '\n'; } } while (false)
2102 << " constant, expected: " << s32 << " or " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << '\n'; } } while (false)
2103 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << '\n'; } } while (false)
;
2104 return false;
2105 }
2106
2107 if (RB.getID() != AArch64::FPRRegBankID) {
2108 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2109 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2110 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2111 return false;
2112 }
2113
2114 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2115 // can be sure tablegen works correctly and isn't rescued by this code.
2116 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2117 return false;
2118 } else {
2119 // s32 and s64 are covered by tablegen.
2120 if (Ty != p0 && Ty != s8 && Ty != s16) {
2121 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2122 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2123 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2124 return false;
2125 }
2126
2127 if (RB.getID() != AArch64::GPRRegBankID) {
2128 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2129 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2130 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2131 return false;
2132 }
2133 }
2134
2135 // We allow G_CONSTANT of types < 32b.
2136 const unsigned MovOpc =
2137 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2138
2139 if (isFP) {
2140 // Either emit a FMOV, or emit a copy to emit a normal mov.
2141 const TargetRegisterClass &GPRRC =
2142 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2143 const TargetRegisterClass &FPRRC =
2144 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2145
2146 // Can we use a FMOV instruction to represent the immediate?
2147 if (emitFMovForFConstant(I, MRI))
2148 return true;
2149
2150 // For 64b values, emit a constant pool load instead.
2151 if (DefSize == 64) {
2152 auto *FPImm = I.getOperand(1).getFPImm();
2153 MachineIRBuilder MIB(I);
2154 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2155 if (!LoadMI) {
2156 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2157 return false;
2158 }
2159 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2160 I.eraseFromParent();
2161 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2162 }
2163
2164 // Nope. Emit a copy and use a normal mov instead.
2165 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2166 MachineOperand &RegOp = I.getOperand(0);
2167 RegOp.setReg(DefGPRReg);
2168 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2169 MIB.buildCopy({DefReg}, {DefGPRReg});
2170
2171 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2172 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2173 return false;
2174 }
2175
2176 MachineOperand &ImmOp = I.getOperand(1);
2177 // FIXME: Is going through int64_t always correct?
2178 ImmOp.ChangeToImmediate(
2179 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2180 } else if (I.getOperand(1).isCImm()) {
2181 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2182 I.getOperand(1).ChangeToImmediate(Val);
2183 } else if (I.getOperand(1).isImm()) {
2184 uint64_t Val = I.getOperand(1).getImm();
2185 I.getOperand(1).ChangeToImmediate(Val);
2186 }
2187
2188 I.setDesc(TII.get(MovOpc));
2189 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2190 return true;
2191 }
2192 case TargetOpcode::G_EXTRACT: {
2193 Register DstReg = I.getOperand(0).getReg();
2194 Register SrcReg = I.getOperand(1).getReg();
2195 LLT SrcTy = MRI.getType(SrcReg);
2196 LLT DstTy = MRI.getType(DstReg);
2197 (void)DstTy;
2198 unsigned SrcSize = SrcTy.getSizeInBits();
2199
2200 if (SrcTy.getSizeInBits() > 64) {
2201 // This should be an extract of an s128, which is like a vector extract.
2202 if (SrcTy.getSizeInBits() != 128)
2203 return false;
2204 // Only support extracting 64 bits from an s128 at the moment.
2205 if (DstTy.getSizeInBits() != 64)
2206 return false;
2207
2208 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2209 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2210 // Check we have the right regbank always.
2211 assert(SrcRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2213, __PRETTY_FUNCTION__))
2212 DstRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2213, __PRETTY_FUNCTION__))
2213 "Wrong extract regbank!")((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2213, __PRETTY_FUNCTION__))
;
2214 (void)SrcRB;
2215
2216 // Emit the same code as a vector extract.
2217 // Offset must be a multiple of 64.
2218 unsigned Offset = I.getOperand(2).getImm();
2219 if (Offset % 64 != 0)
2220 return false;
2221 unsigned LaneIdx = Offset / 64;
2222 MachineIRBuilder MIB(I);
2223 MachineInstr *Extract = emitExtractVectorElt(
2224 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2225 if (!Extract)
2226 return false;
2227 I.eraseFromParent();
2228 return true;
2229 }
2230
2231 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2232 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2233 Ty.getSizeInBits() - 1);
2234
2235 if (SrcSize < 64) {
2236 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2237, __PRETTY_FUNCTION__))
2237 "unexpected G_EXTRACT types")((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2237, __PRETTY_FUNCTION__))
;
2238 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2239 }
2240
2241 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2242 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2243 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2244 .addReg(DstReg, 0, AArch64::sub_32);
2245 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2246 AArch64::GPR32RegClass, MRI);
2247 I.getOperand(0).setReg(DstReg);
2248
2249 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2250 }
2251
2252 case TargetOpcode::G_INSERT: {
2253 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2254 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2255 unsigned DstSize = DstTy.getSizeInBits();
2256 // Larger inserts are vectors, same-size ones should be something else by
2257 // now (split up or turned into COPYs).
2258 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2259 return false;
2260
2261 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2262 unsigned LSB = I.getOperand(3).getImm();
2263 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2264 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2265 MachineInstrBuilder(MF, I).addImm(Width - 1);
2266
2267 if (DstSize < 64) {
2268 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2269, __PRETTY_FUNCTION__))
2269 "unexpected G_INSERT types")((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2269, __PRETTY_FUNCTION__))
;
2270 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2271 }
2272
2273 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2274 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2275 TII.get(AArch64::SUBREG_TO_REG))
2276 .addDef(SrcReg)
2277 .addImm(0)
2278 .addUse(I.getOperand(2).getReg())
2279 .addImm(AArch64::sub_32);
2280 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2281 AArch64::GPR32RegClass, MRI);
2282 I.getOperand(2).setReg(SrcReg);
2283
2284 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2285 }
2286 case TargetOpcode::G_FRAME_INDEX: {
2287 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2288 if (Ty != LLT::pointer(0, 64)) {
2289 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2290 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2291 return false;
2292 }
2293 I.setDesc(TII.get(AArch64::ADDXri));
2294
2295 // MOs for a #0 shifted immediate.
2296 I.addOperand(MachineOperand::CreateImm(0));
2297 I.addOperand(MachineOperand::CreateImm(0));
2298
2299 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2300 }
2301
2302 case TargetOpcode::G_GLOBAL_VALUE: {
2303 auto GV = I.getOperand(1).getGlobal();
2304 if (GV->isThreadLocal())
2305 return selectTLSGlobalValue(I, MRI);
2306
2307 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2308 if (OpFlags & AArch64II::MO_GOT) {
2309 I.setDesc(TII.get(AArch64::LOADgot));
2310 I.getOperand(1).setTargetFlags(OpFlags);
2311 } else if (TM.getCodeModel() == CodeModel::Large) {
2312 // Materialize the global using movz/movk instructions.
2313 materializeLargeCMVal(I, GV, OpFlags);
2314 I.eraseFromParent();
2315 return true;
2316 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2317 I.setDesc(TII.get(AArch64::ADR));
2318 I.getOperand(1).setTargetFlags(OpFlags);
2319 } else {
2320 I.setDesc(TII.get(AArch64::MOVaddr));
2321 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2322 MachineInstrBuilder MIB(MF, I);
2323 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2324 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2325 }
2326 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2327 }
2328
2329 case TargetOpcode::G_ZEXTLOAD:
2330 case TargetOpcode::G_LOAD:
2331 case TargetOpcode::G_STORE: {
2332 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2333 MachineIRBuilder MIB(I);
2334
2335 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2336
2337 if (PtrTy != LLT::pointer(0, 64)) {
2338 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2339 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2340 return false;
2341 }
2342
2343 auto &MemOp = **I.memoperands_begin();
2344 uint64_t MemSizeInBytes = MemOp.getSize();
2345 if (MemOp.isAtomic()) {
2346 // For now we just support s8 acquire loads to be able to compile stack
2347 // protector code.
2348 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2349 MemSizeInBytes == 1) {
2350 I.setDesc(TII.get(AArch64::LDARB));
2351 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2352 }
2353 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Atomic load/store not fully supported yet\n"
; } } while (false)
;
2354 return false;
2355 }
2356 unsigned MemSizeInBits = MemSizeInBytes * 8;
2357
2358#ifndef NDEBUG
2359 const Register PtrReg = I.getOperand(1).getReg();
2360 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2361 // Sanity-check the pointer register.
2362 assert(PtrRB.getID() == AArch64::GPRRegBankID &&((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2363, __PRETTY_FUNCTION__))
2363 "Load/Store pointer operand isn't a GPR")((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2363, __PRETTY_FUNCTION__))
;
2364 assert(MRI.getType(PtrReg).isPointer() &&((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2365, __PRETTY_FUNCTION__))
2365 "Load/Store pointer operand isn't a pointer")((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2365, __PRETTY_FUNCTION__))
;
2366#endif
2367
2368 const Register ValReg = I.getOperand(0).getReg();
2369 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2370
2371 // Helper lambda for partially selecting I. Either returns the original
2372 // instruction with an updated opcode, or a new instruction.
2373 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2374 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
1
Assuming the condition is true
2375 const unsigned NewOpc =
2376 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2377 if (NewOpc == I.getOpcode())
2
Taking false branch
2378 return nullptr;
2379 // Check if we can fold anything into the addressing mode.
2380 auto AddrModeFns =
2381 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2382 if (!AddrModeFns) {
2383 // Can't fold anything. Use the original instruction.
2384 I.setDesc(TII.get(NewOpc));
2385 I.addOperand(MachineOperand::CreateImm(0));
2386 return &I;
2387 }
2388
2389 // Folded something. Create a new instruction and return it.
2390 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2391 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2392 NewInst.cloneMemRefs(I);
2393 for (auto &Fn : *AddrModeFns)
2394 Fn(NewInst);
2395 I.eraseFromParent();
2396 return &*NewInst;
2397 };
2398
2399 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2400 if (!LoadStore)
2401 return false;
2402
2403 // If we're storing a 0, use WZR/XZR.
2404 if (Opcode == TargetOpcode::G_STORE) {
2405 auto CVal = getConstantVRegValWithLookThrough(
2406 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2407 /*HandleFConstants = */ false);
2408 if (CVal && CVal->Value == 0) {
2409 switch (LoadStore->getOpcode()) {
2410 case AArch64::STRWui:
2411 case AArch64::STRHHui:
2412 case AArch64::STRBBui:
2413 LoadStore->getOperand(0).setReg(AArch64::WZR);
2414 break;
2415 case AArch64::STRXui:
2416 LoadStore->getOperand(0).setReg(AArch64::XZR);
2417 break;
2418 }
2419 }
2420 }
2421
2422 if (IsZExtLoad) {
2423 // The zextload from a smaller type to i32 should be handled by the
2424 // importer.
2425 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2426 return false;
2427 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2428 // and zero_extend with SUBREG_TO_REG.
2429 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2430 Register DstReg = LoadStore->getOperand(0).getReg();
2431 LoadStore->getOperand(0).setReg(LdReg);
2432
2433 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2434 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2435 .addImm(0)
2436 .addUse(LdReg)
2437 .addImm(AArch64::sub_32);
2438 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2439 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2440 MRI);
2441 }
2442 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2443 }
2444
2445 case TargetOpcode::G_SMULH:
2446 case TargetOpcode::G_UMULH: {
2447 // Reject the various things we don't support yet.
2448 if (unsupportedBinOp(I, RBI, MRI, TRI))
2449 return false;
2450
2451 const Register DefReg = I.getOperand(0).getReg();
2452 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2453
2454 if (RB.getID() != AArch64::GPRRegBankID) {
2455 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2456 return false;
2457 }
2458
2459 if (Ty != LLT::scalar(64)) {
2460 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2461 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2462 return false;
2463 }
2464
2465 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2466 : AArch64::UMULHrr;
2467 I.setDesc(TII.get(NewOpc));
2468
2469 // Now that we selected an opcode, we need to constrain the register
2470 // operands to use appropriate classes.
2471 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2472 }
2473 case TargetOpcode::G_LSHR:
2474 case TargetOpcode::G_ASHR:
2475 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2476 return selectVectorAshrLshr(I, MRI);
2477 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2478 case TargetOpcode::G_SHL:
2479 if (Opcode == TargetOpcode::G_SHL &&
2480 MRI.getType(I.getOperand(0).getReg()).isVector())
2481 return selectVectorSHL(I, MRI);
2482 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2483 case TargetOpcode::G_FADD:
2484 case TargetOpcode::G_FSUB:
2485 case TargetOpcode::G_FMUL:
2486 case TargetOpcode::G_FDIV:
2487 case TargetOpcode::G_OR: {
2488 // Reject the various things we don't support yet.
2489 if (unsupportedBinOp(I, RBI, MRI, TRI))
2490 return false;
2491
2492 const unsigned OpSize = Ty.getSizeInBits();
2493
2494 const Register DefReg = I.getOperand(0).getReg();
2495 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2496
2497 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2498 if (NewOpc == I.getOpcode())
2499 return false;
2500
2501 I.setDesc(TII.get(NewOpc));
2502 // FIXME: Should the type be always reset in setDesc?
2503
2504 // Now that we selected an opcode, we need to constrain the register
2505 // operands to use appropriate classes.
2506 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2507 }
2508
2509 case TargetOpcode::G_PTR_ADD: {
2510 MachineIRBuilder MIRBuilder(I);
2511 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2512 MIRBuilder);
2513 I.eraseFromParent();
2514 return true;
2515 }
2516 case TargetOpcode::G_UADDO: {
2517 // TODO: Support other types.
2518 unsigned OpSize = Ty.getSizeInBits();
2519 if (OpSize != 32 && OpSize != 64) {
2520 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for 32 and 64 b types.\n"
; } } while (false)
2521 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for 32 and 64 b types.\n"
; } } while (false)
2522 << "G_UADDO currently only supported for 32 and 64 b types.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for 32 and 64 b types.\n"
; } } while (false)
;
2523 return false;
2524 }
2525
2526 // TODO: Support vectors.
2527 if (Ty.isVector()) {
2528 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for scalars.\n"
; } } while (false)
;
2529 return false;
2530 }
2531
2532 // Add and set the set condition flag.
2533 MachineIRBuilder MIRBuilder(I);
2534 emitADDS(I.getOperand(0).getReg(), I.getOperand(2), I.getOperand(3),
2535 MIRBuilder);
2536
2537 // Now, put the overflow result in the register given by the first operand
2538 // to the G_UADDO. CSINC increments the result when the predicate is false,
2539 // so to get the increment when it's true, we need to use the inverse. In
2540 // this case, we want to increment when carry is set.
2541 auto CsetMI = MIRBuilder
2542 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2543 {Register(AArch64::WZR), Register(AArch64::WZR)})
2544 .addImm(getInvertedCondCode(AArch64CC::HS));
2545 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2546 I.eraseFromParent();
2547 return true;
2548 }
2549
2550 case TargetOpcode::G_PTRMASK: {
2551 Register MaskReg = I.getOperand(2).getReg();
2552 Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2553 // TODO: Implement arbitrary cases
2554 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2555 return false;
2556
2557 uint64_t Mask = *MaskVal;
2558 I.setDesc(TII.get(AArch64::ANDXri));
2559 I.getOperand(2).ChangeToImmediate(
2560 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2561
2562 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2563 }
2564 case TargetOpcode::G_PTRTOINT:
2565 case TargetOpcode::G_TRUNC: {
2566 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2567 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2568
2569 const Register DstReg = I.getOperand(0).getReg();
2570 const Register SrcReg = I.getOperand(1).getReg();
2571
2572 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2573 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2574
2575 if (DstRB.getID() != SrcRB.getID()) {
2576 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
2577 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
2578 return false;
2579 }
2580
2581 if (DstRB.getID() == AArch64::GPRRegBankID) {
2582 const TargetRegisterClass *DstRC =
2583 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2584 if (!DstRC)
2585 return false;
2586
2587 const TargetRegisterClass *SrcRC =
2588 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2589 if (!SrcRC)
2590 return false;
2591
2592 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2593 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2594 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2595 return false;
2596 }
2597
2598 if (DstRC == SrcRC) {
2599 // Nothing to be done
2600 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2601 SrcTy == LLT::scalar(64)) {
2602 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2602)
;
2603 return false;
2604 } else if (DstRC == &AArch64::GPR32RegClass &&
2605 SrcRC == &AArch64::GPR64RegClass) {
2606 I.getOperand(1).setSubReg(AArch64::sub_32);
2607 } else {
2608 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
2609 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2610 return false;
2611 }
2612
2613 I.setDesc(TII.get(TargetOpcode::COPY));
2614 return true;
2615 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2616 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2617 I.setDesc(TII.get(AArch64::XTNv4i16));
2618 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2619 return true;
2620 }
2621
2622 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2623 MachineIRBuilder MIB(I);
2624 MachineInstr *Extract = emitExtractVectorElt(
2625 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2626 if (!Extract)
2627 return false;
2628 I.eraseFromParent();
2629 return true;
2630 }
2631
2632 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2633 if (Opcode == TargetOpcode::G_PTRTOINT) {
2634 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")((DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? static_cast<void> (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2634, __PRETTY_FUNCTION__))
;
2635 I.setDesc(TII.get(TargetOpcode::COPY));
2636 return true;
2637 }
2638 }
2639
2640 return false;
2641 }
2642
2643 case TargetOpcode::G_ANYEXT: {
2644 const Register DstReg = I.getOperand(0).getReg();
2645 const Register SrcReg = I.getOperand(1).getReg();
2646
2647 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2648 if (RBDst.getID() != AArch64::GPRRegBankID) {
2649 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
2650 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
2651 return false;
2652 }
2653
2654 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2655 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2656 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
2657 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
2658 return false;
2659 }
2660
2661 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2662
2663 if (DstSize == 0) {
2664 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
2665 return false;
2666 }
2667
2668 if (DstSize != 64 && DstSize > 32) {
2669 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
2670 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
2671 return false;
2672 }
2673 // At this point G_ANYEXT is just like a plain COPY, but we need
2674 // to explicitly form the 64-bit value if any.
2675 if (DstSize > 32) {
2676 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2677 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2678 .addDef(ExtSrc)
2679 .addImm(0)
2680 .addUse(SrcReg)
2681 .addImm(AArch64::sub_32);
2682 I.getOperand(1).setReg(ExtSrc);
2683 }
2684 return selectCopy(I, TII, MRI, TRI, RBI);
2685 }
2686
2687 case TargetOpcode::G_ZEXT:
2688 case TargetOpcode::G_SEXT_INREG:
2689 case TargetOpcode::G_SEXT: {
2690 unsigned Opcode = I.getOpcode();
2691 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2692 const Register DefReg = I.getOperand(0).getReg();
2693 Register SrcReg = I.getOperand(1).getReg();
2694 const LLT DstTy = MRI.getType(DefReg);
2695 const LLT SrcTy = MRI.getType(SrcReg);
2696 unsigned DstSize = DstTy.getSizeInBits();
2697 unsigned SrcSize = SrcTy.getSizeInBits();
2698
2699 // SEXT_INREG has the same src reg size as dst, the size of the value to be
2700 // extended is encoded in the imm.
2701 if (Opcode == TargetOpcode::G_SEXT_INREG)
2702 SrcSize = I.getOperand(2).getImm();
2703
2704 if (DstTy.isVector())
2705 return false; // Should be handled by imported patterns.
2706
2707 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __PRETTY_FUNCTION__))
2708 AArch64::GPRRegBankID &&(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __PRETTY_FUNCTION__))
2709 "Unexpected ext regbank")(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __PRETTY_FUNCTION__))
;
2710
2711 MachineIRBuilder MIB(I);
2712 MachineInstr *ExtI;
2713
2714 // First check if we're extending the result of a load which has a dest type
2715 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2716 // GPR register on AArch64 and all loads which are smaller automatically
2717 // zero-extend the upper bits. E.g.
2718 // %v(s8) = G_LOAD %p, :: (load 1)
2719 // %v2(s32) = G_ZEXT %v(s8)
2720 if (!IsSigned) {
2721 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2722 bool IsGPR =
2723 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2724 if (LoadMI && IsGPR) {
2725 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2726 unsigned BytesLoaded = MemOp->getSize();
2727 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2728 return selectCopy(I, TII, MRI, TRI, RBI);
2729 }
2730
2731 // If we are zero extending from 32 bits to 64 bits, it's possible that
2732 // the instruction implicitly does the zero extend for us. In that case,
2733 // we can just emit a SUBREG_TO_REG.
2734 if (IsGPR && SrcSize == 32 && DstSize == 64) {
2735 // Unlike with the G_LOAD case, we don't want to look through copies
2736 // here.
2737 MachineInstr *Def = MRI.getVRegDef(SrcReg);
2738 if (Def && isDef32(*Def)) {
2739 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2740 .addImm(0)
2741 .addUse(SrcReg)
2742 .addImm(AArch64::sub_32);
2743
2744 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2745 MRI)) {
2746 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
2747 return false;
2748 }
2749
2750 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2751 MRI)) {
2752 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
2753 return false;
2754 }
2755
2756 I.eraseFromParent();
2757 return true;
2758 }
2759 }
2760 }
2761
2762 if (DstSize == 64) {
2763 if (Opcode != TargetOpcode::G_SEXT_INREG) {
2764 // FIXME: Can we avoid manually doing this?
2765 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2766 MRI)) {
2767 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
2768 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
2769 return false;
2770 }
2771 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2772 {&AArch64::GPR64RegClass}, {})
2773 .addImm(0)
2774 .addUse(SrcReg)
2775 .addImm(AArch64::sub_32)
2776 .getReg(0);
2777 }
2778
2779 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2780 {DefReg}, {SrcReg})
2781 .addImm(0)
2782 .addImm(SrcSize - 1);
2783 } else if (DstSize <= 32) {
2784 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2785 {DefReg}, {SrcReg})
2786 .addImm(0)
2787 .addImm(SrcSize - 1);
2788 } else {
2789 return false;
2790 }
2791
2792 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2793 I.eraseFromParent();
2794 return true;
2795 }
2796
2797 case TargetOpcode::G_SITOFP:
2798 case TargetOpcode::G_UITOFP:
2799 case TargetOpcode::G_FPTOSI:
2800 case TargetOpcode::G_FPTOUI: {
2801 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2802 SrcTy = MRI.getType(I.getOperand(1).getReg());
2803 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2804 if (NewOpc == Opcode)
2805 return false;
2806
2807 I.setDesc(TII.get(NewOpc));
2808 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2809
2810 return true;
2811 }
2812
2813 case TargetOpcode::G_FREEZE:
2814 return selectCopy(I, TII, MRI, TRI, RBI);
2815
2816 case TargetOpcode::G_INTTOPTR:
2817 // The importer is currently unable to import pointer types since they
2818 // didn't exist in SelectionDAG.
2819 return selectCopy(I, TII, MRI, TRI, RBI);
2820
2821 case TargetOpcode::G_BITCAST:
2822 // Imported SelectionDAG rules can handle every bitcast except those that
2823 // bitcast from a type to the same type. Ideally, these shouldn't occur
2824 // but we might not run an optimizer that deletes them. The other exception
2825 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2826 // of them.
2827 return selectCopy(I, TII, MRI, TRI, RBI);
2828
2829 case TargetOpcode::G_SELECT: {
2830 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2831 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
2832 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
2833 return false;
2834 }
2835
2836 const Register CondReg = I.getOperand(1).getReg();
2837 const Register TReg = I.getOperand(2).getReg();
2838 const Register FReg = I.getOperand(3).getReg();
2839
2840 if (tryOptSelect(I))
2841 return true;
2842
2843 Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2844 MachineInstr &TstMI =
2845 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2846 .addDef(AArch64::WZR)
2847 .addUse(CondReg)
2848 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2849
2850 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2851 .addDef(I.getOperand(0).getReg())
2852 .addUse(TReg)
2853 .addUse(FReg)
2854 .addImm(AArch64CC::NE);
2855
2856 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2857 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2858
2859 I.eraseFromParent();
2860 return true;
2861 }
2862 case TargetOpcode::G_ICMP: {
2863 if (Ty.isVector())
2864 return selectVectorICmp(I, MRI);
2865
2866 if (Ty != LLT::scalar(32)) {
2867 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
2868 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
2869 return false;
2870 }
2871
2872 MachineIRBuilder MIRBuilder(I);
2873 MachineInstr *Cmp;
2874 CmpInst::Predicate Pred;
2875 std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
2876 I.getOperand(1), MIRBuilder);
2877 if (!Cmp)
2878 return false;
2879 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
2880 I.eraseFromParent();
2881 return true;
2882 }
2883
2884 case TargetOpcode::G_FCMP: {
2885 if (Ty != LLT::scalar(32)) {
2886 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FCMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
2887 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FCMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
2888 return false;
2889 }
2890
2891 unsigned CmpOpc = selectFCMPOpc(I, MRI);
2892 if (!CmpOpc)
2893 return false;
2894
2895 // FIXME: regbank
2896
2897 AArch64CC::CondCode CC1, CC2;
2898 changeFCMPPredToAArch64CC(
2899 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2900
2901 // Partially build the compare. Decide if we need to add a use for the
2902 // third operand based off whether or not we're comparing against 0.0.
2903 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2904 .addUse(I.getOperand(2).getReg());
2905
2906 // If we don't have an immediate compare, then we need to add a use of the
2907 // register which wasn't used for the immediate.
2908 // Note that the immediate will always be the last operand.
2909 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2910 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2911
2912 const Register DefReg = I.getOperand(0).getReg();
2913 Register Def1Reg = DefReg;
2914 if (CC2 != AArch64CC::AL)
2915 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2916
2917 MachineInstr &CSetMI =
2918 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2919 .addDef(Def1Reg)
2920 .addUse(AArch64::WZR)
2921 .addUse(AArch64::WZR)
2922 .addImm(getInvertedCondCode(CC1));
2923
2924 if (CC2 != AArch64CC::AL) {
2925 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2926 MachineInstr &CSet2MI =
2927 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2928 .addDef(Def2Reg)
2929 .addUse(AArch64::WZR)
2930 .addUse(AArch64::WZR)
2931 .addImm(getInvertedCondCode(CC2));
2932 MachineInstr &OrMI =
2933 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2934 .addDef(DefReg)
2935 .addUse(Def1Reg)
2936 .addUse(Def2Reg);
2937 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2938 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2939 }
2940 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2941 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2942
2943 I.eraseFromParent();
2944 return true;
2945 }
2946 case TargetOpcode::G_VASTART:
2947 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2948 : selectVaStartAAPCS(I, MF, MRI);
2949 case TargetOpcode::G_INTRINSIC:
2950 return selectIntrinsic(I, MRI);
2951 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2952 return selectIntrinsicWithSideEffects(I, MRI);
2953 case TargetOpcode::G_IMPLICIT_DEF: {
2954 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2955 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2956 const Register DstReg = I.getOperand(0).getReg();
2957 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2958 const TargetRegisterClass *DstRC =
2959 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2960 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2961 return true;
2962 }
2963 case TargetOpcode::G_BLOCK_ADDR: {
2964 if (TM.getCodeModel() == CodeModel::Large) {
2965 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2966 I.eraseFromParent();
2967 return true;
2968 } else {
2969 I.setDesc(TII.get(AArch64::MOVaddrBA));
2970 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2971 I.getOperand(0).getReg())
2972 .addBlockAddress(I.getOperand(1).getBlockAddress(),
2973 /* Offset */ 0, AArch64II::MO_PAGE)
2974 .addBlockAddress(
2975 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2976 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2977 I.eraseFromParent();
2978 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2979 }
2980 }
2981 case AArch64::G_DUP: {
2982 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
2983 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
2984 // difficult because at RBS we may end up pessimizing the fpr case if we
2985 // decided to add an anyextend to fix this. Manual selection is the most
2986 // robust solution for now.
2987 Register SrcReg = I.getOperand(1).getReg();
2988 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
2989 return false; // We expect the fpr regbank case to be imported.
2990 LLT SrcTy = MRI.getType(SrcReg);
2991 if (SrcTy.getSizeInBits() == 16)
2992 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
2993 else if (SrcTy.getSizeInBits() == 8)
2994 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
2995 else
2996 return false;
2997 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2998 }
2999 case TargetOpcode::G_INTRINSIC_TRUNC:
3000 return selectIntrinsicTrunc(I, MRI);
3001 case TargetOpcode::G_INTRINSIC_ROUND:
3002 return selectIntrinsicRound(I, MRI);
3003 case TargetOpcode::G_BUILD_VECTOR:
3004 return selectBuildVector(I, MRI);
3005 case TargetOpcode::G_MERGE_VALUES:
3006 return selectMergeValues(I, MRI);
3007 case TargetOpcode::G_UNMERGE_VALUES:
3008 return selectUnmergeValues(I, MRI);
3009 case TargetOpcode::G_SHUFFLE_VECTOR:
3010 return selectShuffleVector(I, MRI);
3011 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3012 return selectExtractElt(I, MRI);
3013 case TargetOpcode::G_INSERT_VECTOR_ELT:
3014 return selectInsertElt(I, MRI);
3015 case TargetOpcode::G_CONCAT_VECTORS:
3016 return selectConcatVectors(I, MRI);
3017 case TargetOpcode::G_JUMP_TABLE:
3018 return selectJumpTable(I, MRI);
3019 }
3020
3021 return false;
3022}
3023
3024bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3025 MachineRegisterInfo &MRI) const {
3026 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")((I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3026, __PRETTY_FUNCTION__))
;
3027 Register JTAddr = I.getOperand(0).getReg();
3028 unsigned JTI = I.getOperand(1).getIndex();
3029 Register Index = I.getOperand(2).getReg();
3030 MachineIRBuilder MIB(I);
3031
3032 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3033 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3034
3035 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3036 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3037 {TargetReg, ScratchReg}, {JTAddr, Index})
3038 .addJumpTableIndex(JTI);
3039 // Build the indirect branch.
3040 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3041 I.eraseFromParent();
3042 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3043}
3044
3045bool AArch64InstructionSelector::selectJumpTable(
3046 MachineInstr &I, MachineRegisterInfo &MRI) const {
3047 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")((I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3047, __PRETTY_FUNCTION__))
;
3048 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")((I.getOperand(1).isJTI() && "Jump table op should have a JTI!"
) ? static_cast<void> (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3048, __PRETTY_FUNCTION__))
;
3049
3050 Register DstReg = I.getOperand(0).getReg();
3051 unsigned JTI = I.getOperand(1).getIndex();
3052 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3053 MachineIRBuilder MIB(I);
3054 auto MovMI =
3055 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3056 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3057 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3058 I.eraseFromParent();
3059 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3060}
3061
3062bool AArch64InstructionSelector::selectTLSGlobalValue(
3063 MachineInstr &I, MachineRegisterInfo &MRI) const {
3064 if (!STI.isTargetMachO())
3065 return false;
3066 MachineFunction &MF = *I.getParent()->getParent();
3067 MF.getFrameInfo().setAdjustsStack(true);
3068
3069 const GlobalValue &GV = *I.getOperand(1).getGlobal();
3070 MachineIRBuilder MIB(I);
3071
3072 auto LoadGOT =
3073 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3074 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3075
3076 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3077 {LoadGOT.getReg(0)})
3078 .addImm(0);
3079
3080 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3081 // TLS calls preserve all registers except those that absolutely must be
3082 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3083 // silly).
3084 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3085 .addUse(AArch64::X0, RegState::Implicit)
3086 .addDef(AArch64::X0, RegState::Implicit)
3087 .addRegMask(TRI.getTLSCallPreservedMask());
3088
3089 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3090 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3091 MRI);
3092 I.eraseFromParent();
3093 return true;
3094}
3095
3096bool AArch64InstructionSelector::selectIntrinsicTrunc(
3097 MachineInstr &I, MachineRegisterInfo &MRI) const {
3098 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3099
3100 // Select the correct opcode.
3101 unsigned Opc = 0;
3102 if (!SrcTy.isVector()) {
3103 switch (SrcTy.getSizeInBits()) {
3104 default:
3105 case 16:
3106 Opc = AArch64::FRINTZHr;
3107 break;
3108 case 32:
3109 Opc = AArch64::FRINTZSr;
3110 break;
3111 case 64:
3112 Opc = AArch64::FRINTZDr;
3113 break;
3114 }
3115 } else {
3116 unsigned NumElts = SrcTy.getNumElements();
3117 switch (SrcTy.getElementType().getSizeInBits()) {
3118 default:
3119 break;
3120 case 16:
3121 if (NumElts == 4)
3122 Opc = AArch64::FRINTZv4f16;
3123 else if (NumElts == 8)
3124 Opc = AArch64::FRINTZv8f16;
3125 break;
3126 case 32:
3127 if (NumElts == 2)
3128 Opc = AArch64::FRINTZv2f32;
3129 else if (NumElts == 4)
3130 Opc = AArch64::FRINTZv4f32;
3131 break;
3132 case 64:
3133 if (NumElts == 2)
3134 Opc = AArch64::FRINTZv2f64;
3135 break;
3136 }
3137 }
3138
3139 if (!Opc) {
3140 // Didn't get an opcode above, bail.
3141 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3142 return false;
3143 }
3144
3145 // Legalization would have set us up perfectly for this; we just need to
3146 // set the opcode and move on.
3147 I.setDesc(TII.get(Opc));
3148 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3149}
3150
3151bool AArch64InstructionSelector::selectIntrinsicRound(
3152 MachineInstr &I, MachineRegisterInfo &MRI) const {
3153 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3154
3155 // Select the correct opcode.
3156 unsigned Opc = 0;
3157 if (!SrcTy.isVector()) {
3158 switch (SrcTy.getSizeInBits()) {
3159 default:
3160 case 16:
3161 Opc = AArch64::FRINTAHr;
3162 break;
3163 case 32:
3164 Opc = AArch64::FRINTASr;
3165 break;
3166 case 64:
3167 Opc = AArch64::FRINTADr;
3168 break;
3169 }
3170 } else {
3171 unsigned NumElts = SrcTy.getNumElements();
3172 switch (SrcTy.getElementType().getSizeInBits()) {
3173 default:
3174 break;
3175 case 16:
3176 if (NumElts == 4)
3177 Opc = AArch64::FRINTAv4f16;
3178 else if (NumElts == 8)
3179 Opc = AArch64::FRINTAv8f16;
3180 break;
3181 case 32:
3182 if (NumElts == 2)
3183 Opc = AArch64::FRINTAv2f32;
3184 else if (NumElts == 4)
3185 Opc = AArch64::FRINTAv4f32;
3186 break;
3187 case 64:
3188 if (NumElts == 2)
3189 Opc = AArch64::FRINTAv2f64;
3190 break;
3191 }
3192 }
3193
3194 if (!Opc) {
3195 // Didn't get an opcode above, bail.
3196 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3197 return false;
3198 }
3199
3200 // Legalization would have set us up perfectly for this; we just need to
3201 // set the opcode and move on.
3202 I.setDesc(TII.get(Opc));
3203 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3204}
3205
3206bool AArch64InstructionSelector::selectVectorICmp(
3207 MachineInstr &I, MachineRegisterInfo &MRI) const {
3208 Register DstReg = I.getOperand(0).getReg();
3209 LLT DstTy = MRI.getType(DstReg);
3210 Register SrcReg = I.getOperand(2).getReg();
3211 Register Src2Reg = I.getOperand(3).getReg();
3212 LLT SrcTy = MRI.getType(SrcReg);
3213
3214 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3215 unsigned NumElts = DstTy.getNumElements();
3216
3217 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3218 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3219 // Third index is cc opcode:
3220 // 0 == eq
3221 // 1 == ugt
3222 // 2 == uge
3223 // 3 == ult
3224 // 4 == ule
3225 // 5 == sgt
3226 // 6 == sge
3227 // 7 == slt
3228 // 8 == sle
3229 // ne is done by negating 'eq' result.
3230
3231 // This table below assumes that for some comparisons the operands will be
3232 // commuted.
3233 // ult op == commute + ugt op
3234 // ule op == commute + uge op
3235 // slt op == commute + sgt op
3236 // sle op == commute + sge op
3237 unsigned PredIdx = 0;
3238 bool SwapOperands = false;
3239 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3240 switch (Pred) {
3241 case CmpInst::ICMP_NE:
3242 case CmpInst::ICMP_EQ:
3243 PredIdx = 0;
3244 break;
3245 case CmpInst::ICMP_UGT:
3246 PredIdx = 1;
3247 break;
3248 case CmpInst::ICMP_UGE:
3249 PredIdx = 2;
3250 break;
3251 case CmpInst::ICMP_ULT:
3252 PredIdx = 3;
3253 SwapOperands = true;
3254 break;
3255 case CmpInst::ICMP_ULE:
3256 PredIdx = 4;
3257 SwapOperands = true;
3258 break;
3259 case CmpInst::ICMP_SGT:
3260 PredIdx = 5;
3261 break;
3262 case CmpInst::ICMP_SGE:
3263 PredIdx = 6;
3264 break;
3265 case CmpInst::ICMP_SLT:
3266 PredIdx = 7;
3267 SwapOperands = true;
3268 break;
3269 case CmpInst::ICMP_SLE:
3270 PredIdx = 8;
3271 SwapOperands = true;
3272 break;
3273 default:
3274 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3274)
;
3275 return false;
3276 }
3277
3278 // This table obviously should be tablegen'd when we have our GISel native
3279 // tablegen selector.
3280
3281 static const unsigned OpcTable[4][4][9] = {
3282 {
3283 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3284 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3285 0 /* invalid */},
3286 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3287 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3288 0 /* invalid */},
3289 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3290 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3291 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3292 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3293 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3294 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3295 },
3296 {
3297 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3298 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3299 0 /* invalid */},
3300 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3301 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3302 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3303 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3304 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3305 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3306 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3307 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3308 0 /* invalid */}
3309 },
3310 {
3311 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3312 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3313 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3314 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3315 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3316 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3317 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3318 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3319 0 /* invalid */},
3320 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3321 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3322 0 /* invalid */}
3323 },
3324 {
3325 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3326 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3327 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3328 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3329 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3330 0 /* invalid */},
3331 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3332 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3333 0 /* invalid */},
3334 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3335 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3336 0 /* invalid */}
3337 },
3338 };
3339 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3340 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3341 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3342 if (!Opc) {
3343 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3344 return false;
3345 }
3346
3347 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3348 const TargetRegisterClass *SrcRC =
3349 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3350 if (!SrcRC) {
3351 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3352 return false;
3353 }
3354
3355 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3356 if (SrcTy.getSizeInBits() == 128)
3357 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3358
3359 if (SwapOperands)
3360 std::swap(SrcReg, Src2Reg);
3361
3362 MachineIRBuilder MIB(I);
3363 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3364 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3365
3366 // Invert if we had a 'ne' cc.
3367 if (NotOpc) {
3368 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3369 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3370 } else {
3371 MIB.buildCopy(DstReg, Cmp.getReg(0));
3372 }
3373 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3374 I.eraseFromParent();
3375 return true;
3376}
3377
3378MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3379 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3380 MachineIRBuilder &MIRBuilder) const {
3381 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3382
3383 auto BuildFn = [&](unsigned SubregIndex) {
3384 auto Ins =
3385 MIRBuilder
3386 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3387 .addImm(SubregIndex);
3388 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3389 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3390 return &*Ins;
3391 };
3392
3393 switch (EltSize) {
3394 case 16:
3395 return BuildFn(AArch64::hsub);
3396 case 32:
3397 return BuildFn(AArch64::ssub);
3398 case 64:
3399 return BuildFn(AArch64::dsub);
3400 default:
3401 return nullptr;
3402 }
3403}
3404
3405bool AArch64InstructionSelector::selectMergeValues(
3406 MachineInstr &I, MachineRegisterInfo &MRI) const {
3407 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")((I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3407, __PRETTY_FUNCTION__))
;
3408 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3409 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3410 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")((!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"
) ? static_cast<void> (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3410, __PRETTY_FUNCTION__))
;
3411 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3412
3413 if (I.getNumOperands() != 3)
3414 return false;
3415
3416 // Merging 2 s64s into an s128.
3417 if (DstTy == LLT::scalar(128)) {
3418 if (SrcTy.getSizeInBits() != 64)
3419 return false;
3420 MachineIRBuilder MIB(I);
3421 Register DstReg = I.getOperand(0).getReg();
3422 Register Src1Reg = I.getOperand(1).getReg();
3423 Register Src2Reg = I.getOperand(2).getReg();
3424 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3425 MachineInstr *InsMI =
3426 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3427 if (!InsMI)
3428 return false;
3429 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3430 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3431 if (!Ins2MI)
3432 return false;
3433 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3434 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3435 I.eraseFromParent();
3436 return true;
3437 }
3438
3439 if (RB.getID() != AArch64::GPRRegBankID)
3440 return false;
3441
3442 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3443 return false;
3444
3445 auto *DstRC = &AArch64::GPR64RegClass;
3446 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3447 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3448 TII.get(TargetOpcode::SUBREG_TO_REG))
3449 .addDef(SubToRegDef)
3450 .addImm(0)
3451 .addUse(I.getOperand(1).getReg())
3452 .addImm(AArch64::sub_32);
3453 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3454 // Need to anyext the second scalar before we can use bfm
3455 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3456 TII.get(TargetOpcode::SUBREG_TO_REG))
3457 .addDef(SubToRegDef2)
3458 .addImm(0)
3459 .addUse(I.getOperand(2).getReg())
3460 .addImm(AArch64::sub_32);
3461 MachineInstr &BFM =
3462 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3463 .addDef(I.getOperand(0).getReg())
3464 .addUse(SubToRegDef)
3465 .addUse(SubToRegDef2)
3466 .addImm(32)
3467 .addImm(31);
3468 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3469 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3470 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3471 I.eraseFromParent();
3472 return true;
3473}
3474
3475static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3476 const unsigned EltSize) {
3477 // Choose a lane copy opcode and subregister based off of the size of the
3478 // vector's elements.
3479 switch (EltSize) {
3480 case 16:
3481 CopyOpc = AArch64::CPYi16;
3482 ExtractSubReg = AArch64::hsub;
3483 break;
3484 case 32:
3485 CopyOpc = AArch64::CPYi32;
3486 ExtractSubReg = AArch64::ssub;
3487 break;
3488 case 64:
3489 CopyOpc = AArch64::CPYi64;
3490 ExtractSubReg = AArch64::dsub;
3491 break;
3492 default:
3493 // Unknown size, bail out.
3494 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3495 return false;
3496 }
3497 return true;
3498}
3499
3500MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3501 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3502 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3503 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3504 unsigned CopyOpc = 0;
3505 unsigned ExtractSubReg = 0;
3506 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3507 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3508 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3509 return nullptr;
3510 }
3511
3512 const TargetRegisterClass *DstRC =
3513 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3514 if (!DstRC) {
3515 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3516 return nullptr;
3517 }
3518
3519 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3520 const LLT &VecTy = MRI.getType(VecReg);
3521 const TargetRegisterClass *VecRC =
3522 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3523 if (!VecRC) {
3524 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3525 return nullptr;
3526 }
3527
3528 // The register that we're going to copy into.
3529 Register InsertReg = VecReg;
3530 if (!DstReg)
3531 DstReg = MRI.createVirtualRegister(DstRC);
3532 // If the lane index is 0, we just use a subregister COPY.
3533 if (LaneIdx == 0) {
3534 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3535 .addReg(VecReg, 0, ExtractSubReg);
3536 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3537 return &*Copy;
3538 }
3539
3540 // Lane copies require 128-bit wide registers. If we're dealing with an
3541 // unpacked vector, then we need to move up to that width. Insert an implicit
3542 // def and a subregister insert to get us there.
3543 if (VecTy.getSizeInBits() != 128) {
3544 MachineInstr *ScalarToVector = emitScalarToVector(
3545 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3546 if (!ScalarToVector)
3547 return nullptr;
3548 InsertReg = ScalarToVector->getOperand(0).getReg();
3549 }
3550
3551 MachineInstr *LaneCopyMI =
3552 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3553 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3554
3555 // Make sure that we actually constrain the initial copy.
3556 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3557 return LaneCopyMI;
3558}
3559
3560bool AArch64InstructionSelector::selectExtractElt(
3561 MachineInstr &I, MachineRegisterInfo &MRI) const {
3562 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3563, __PRETTY_FUNCTION__))
3563 "unexpected opcode!")((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3563, __PRETTY_FUNCTION__))
;
3564 Register DstReg = I.getOperand(0).getReg();
3565 const LLT NarrowTy = MRI.getType(DstReg);
3566 const Register SrcReg = I.getOperand(1).getReg();
3567 const LLT WideTy = MRI.getType(SrcReg);
3568 (void)WideTy;
3569 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3570, __PRETTY_FUNCTION__))
3570 "source register size too small!")((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3570, __PRETTY_FUNCTION__))
;
3571 assert(NarrowTy.isScalar() && "cannot extract vector into vector!")((NarrowTy.isScalar() && "cannot extract vector into vector!"
) ? static_cast<void> (0) : __assert_fail ("NarrowTy.isScalar() && \"cannot extract vector into vector!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3571, __PRETTY_FUNCTION__))
;
3572
3573 // Need the lane index to determine the correct copy opcode.
3574 MachineOperand &LaneIdxOp = I.getOperand(2);
3575 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")((LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? static_cast<void> (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3575, __PRETTY_FUNCTION__))
;
3576
3577 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3578 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
3579 return false;
3580 }
3581
3582 // Find the index to extract from.
3583 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3584 if (!VRegAndVal)
3585 return false;
3586 unsigned LaneIdx = VRegAndVal->Value;
3587
3588 MachineIRBuilder MIRBuilder(I);
3589
3590 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3591 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3592 LaneIdx, MIRBuilder);
3593 if (!Extract)
3594 return false;
3595
3596 I.eraseFromParent();
3597 return true;
3598}
3599
3600bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3601 MachineInstr &I, MachineRegisterInfo &MRI) const {
3602 unsigned NumElts = I.getNumOperands() - 1;
3603 Register SrcReg = I.getOperand(NumElts).getReg();
3604 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3605 const LLT SrcTy = MRI.getType(SrcReg);
3606
3607 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")((NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? static_cast<void> (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3607, __PRETTY_FUNCTION__))
;
3608 if (SrcTy.getSizeInBits() > 128) {
3609 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
3610 return false;
3611 }
3612
3613 MachineIRBuilder MIB(I);
3614
3615 // We implement a split vector operation by treating the sub-vectors as
3616 // scalars and extracting them.
3617 const RegisterBank &DstRB =
3618 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3619 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3620 Register Dst = I.getOperand(OpIdx).getReg();
3621 MachineInstr *Extract =
3622 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3623 if (!Extract)
3624 return false;
3625 }
3626 I.eraseFromParent();
3627 return true;
3628}
3629
3630bool AArch64InstructionSelector::selectUnmergeValues(
3631 MachineInstr &I, MachineRegisterInfo &MRI) const {
3632 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3633, __PRETTY_FUNCTION__))
3633 "unexpected opcode")((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3633, __PRETTY_FUNCTION__))
;
3634
3635 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3636 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3637 AArch64::FPRRegBankID ||
3638 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3639 AArch64::FPRRegBankID) {
3640 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
3641 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
3642 return false;
3643 }
3644
3645 // The last operand is the vector source register, and every other operand is
3646 // a register to unpack into.
3647 unsigned NumElts = I.getNumOperands() - 1;
3648 Register SrcReg = I.getOperand(NumElts).getReg();
3649 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3650 const LLT WideTy = MRI.getType(SrcReg);
3651 (void)WideTy;
3652 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3653, __PRETTY_FUNCTION__))
3653 "can only unmerge from vector or s128 types!")(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3653, __PRETTY_FUNCTION__))
;
3654 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3655, __PRETTY_FUNCTION__))
3655 "source register size too small!")((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3655, __PRETTY_FUNCTION__))
;
3656
3657 if (!NarrowTy.isScalar())
3658 return selectSplitVectorUnmerge(I, MRI);
3659
3660 MachineIRBuilder MIB(I);
3661
3662 // Choose a lane copy opcode and subregister based off of the size of the
3663 // vector's elements.
3664 unsigned CopyOpc = 0;
3665 unsigned ExtractSubReg = 0;
3666 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3667 return false;
3668
3669 // Set up for the lane copies.
3670 MachineBasicBlock &MBB = *I.getParent();
3671
3672 // Stores the registers we'll be copying from.
3673 SmallVector<Register, 4> InsertRegs;
3674
3675 // We'll use the first register twice, so we only need NumElts-1 registers.
3676 unsigned NumInsertRegs = NumElts - 1;
3677
3678 // If our elements fit into exactly 128 bits, then we can copy from the source
3679 // directly. Otherwise, we need to do a bit of setup with some subregister
3680 // inserts.
3681 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3682 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3683 } else {
3684 // No. We have to perform subregister inserts. For each insert, create an
3685 // implicit def and a subregister insert, and save the register we create.
3686 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3687 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3688 MachineInstr &ImpDefMI =
3689 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3690 ImpDefReg);
3691
3692 // Now, create the subregister insert from SrcReg.
3693 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3694 MachineInstr &InsMI =
3695 *BuildMI(MBB, I, I.getDebugLoc(),
3696 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3697 .addUse(ImpDefReg)
3698 .addUse(SrcReg)
3699 .addImm(AArch64::dsub);
3700
3701 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3702 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3703
3704 // Save the register so that we can copy from it after.
3705 InsertRegs.push_back(InsertReg);
3706 }
3707 }
3708
3709 // Now that we've created any necessary subregister inserts, we can
3710 // create the copies.
3711 //
3712 // Perform the first copy separately as a subregister copy.
3713 Register CopyTo = I.getOperand(0).getReg();
3714 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3715 .addReg(InsertRegs[0], 0, ExtractSubReg);
3716 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3717
3718 // Now, perform the remaining copies as vector lane copies.
3719 unsigned LaneIdx = 1;
3720 for (Register InsReg : InsertRegs) {
3721 Register CopyTo = I.getOperand(LaneIdx).getReg();
3722 MachineInstr &CopyInst =
3723 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3724 .addUse(InsReg)
3725 .addImm(LaneIdx);
3726 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3727 ++LaneIdx;
3728 }
3729
3730 // Separately constrain the first copy's destination. Because of the
3731 // limitation in constrainOperandRegClass, we can't guarantee that this will
3732 // actually be constrained. So, do it ourselves using the second operand.
3733 const TargetRegisterClass *RC =
3734 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3735 if (!RC) {
3736 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
3737 return false;
3738 }
3739
3740 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3741 I.eraseFromParent();
3742 return true;
3743}
3744
3745bool AArch64InstructionSelector::selectConcatVectors(
3746 MachineInstr &I, MachineRegisterInfo &MRI) const {
3747 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3748, __PRETTY_FUNCTION__))
3748 "Unexpected opcode")((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3748, __PRETTY_FUNCTION__))
;
3749 Register Dst = I.getOperand(0).getReg();
3750 Register Op1 = I.getOperand(1).getReg();
3751 Register Op2 = I.getOperand(2).getReg();
3752 MachineIRBuilder MIRBuilder(I);
3753 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3754 if (!ConcatMI)
3755 return false;
3756 I.eraseFromParent();
3757 return true;
3758}
3759
3760unsigned
3761AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3762 MachineFunction &MF) const {
3763 Type *CPTy = CPVal->getType();
3764 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3765
3766 MachineConstantPool *MCP = MF.getConstantPool();
3767 return MCP->getConstantPoolIndex(CPVal, Alignment);
3768}
3769
3770MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3771 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3772 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3773
3774 auto Adrp =
3775 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3776 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3777
3778 MachineInstr *LoadMI = nullptr;
3779 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3780 case 16:
3781 LoadMI =
3782 &*MIRBuilder
3783 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3784 .addConstantPoolIndex(CPIdx, 0,
3785 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3786 break;
3787 case 8:
3788 LoadMI = &*MIRBuilder
3789 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3790 .addConstantPoolIndex(
3791 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3792 break;
3793 default:
3794 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
3795 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
3796 return nullptr;
3797 }
3798 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3799 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3800 return LoadMI;
3801}
3802
3803/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3804/// size and RB.
3805static std::pair<unsigned, unsigned>
3806getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3807 unsigned Opc, SubregIdx;
3808 if (RB.getID() == AArch64::GPRRegBankID) {
3809 if (EltSize == 16) {
3810 Opc = AArch64::INSvi16gpr;
3811 SubregIdx = AArch64::ssub;
3812 } else if (EltSize == 32) {
3813 Opc = AArch64::INSvi32gpr;
3814 SubregIdx = AArch64::ssub;
3815 } else if (EltSize == 64) {
3816 Opc = AArch64::INSvi64gpr;
3817 SubregIdx = AArch64::dsub;
3818 } else {
3819 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3819)
;
3820 }
3821 } else {
3822 if (EltSize == 8) {
3823 Opc = AArch64::INSvi8lane;
3824 SubregIdx = AArch64::bsub;
3825 } else if (EltSize == 16) {
3826 Opc = AArch64::INSvi16lane;
3827 SubregIdx = AArch64::hsub;
3828 } else if (EltSize == 32) {
3829 Opc = AArch64::INSvi32lane;
3830 SubregIdx = AArch64::ssub;
3831 } else if (EltSize == 64) {
3832 Opc = AArch64::INSvi64lane;
3833 SubregIdx = AArch64::dsub;
3834 } else {
3835 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3835)
;
3836 }
3837 }
3838 return std::make_pair(Opc, SubregIdx);
3839}
3840
3841MachineInstr *AArch64InstructionSelector::emitInstr(
3842 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
3843 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
3844 const ComplexRendererFns &RenderFns) const {
3845 assert(Opcode && "Expected an opcode?")((Opcode && "Expected an opcode?") ? static_cast<void
> (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3845, __PRETTY_FUNCTION__))
;
3846 assert(!isPreISelGenericOpcode(Opcode) &&((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3847, __PRETTY_FUNCTION__))
3847 "Function should only be used to produce selected instructions!")((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3847, __PRETTY_FUNCTION__))
;
3848 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
3849 if (RenderFns)
3850 for (auto &Fn : *RenderFns)
3851 Fn(MI);
3852 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
3853 return &*MI;
3854}
3855
3856MachineInstr *AArch64InstructionSelector::emitBinOp(
3857 const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
3858 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
3859 MachineIRBuilder &MIRBuilder) const {
3860 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3861 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3861, __PRETTY_FUNCTION__))
;
3862 auto Ty = MRI.getType(LHS.getReg());
3863 assert(Ty.isScalar() && "Expected a scalar?")((Ty.isScalar() && "Expected a scalar?") ? static_cast
<void> (0) : __assert_fail ("Ty.isScalar() && \"Expected a scalar?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3863, __PRETTY_FUNCTION__))
;
3864 unsigned Size = Ty.getSizeInBits();
3865 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3865, __PRETTY_FUNCTION__))
;
3866 bool Is32Bit = Size == 32;
3867 if (auto Fns = selectArithImmed(RHS))
3868 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
3869 MIRBuilder, Fns);
3870 if (auto Fns = selectShiftedRegister(RHS))
3871 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
3872 MIRBuilder, Fns);
3873 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
3874 MIRBuilder);
3875}
3876
3877MachineInstr *
3878AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3879 MachineOperand &RHS,
3880 MachineIRBuilder &MIRBuilder) const {
3881 const std::array<std::array<unsigned, 2>, 3> OpcTable{
3882 {{AArch64::ADDXri, AArch64::ADDWri},
3883 {AArch64::ADDXrs, AArch64::ADDWrs},
3884 {AArch64::ADDXrr, AArch64::ADDWrr}}};
3885 return emitBinOp(OpcTable, DefReg, LHS, RHS, MIRBuilder);
3886}
3887
3888MachineInstr *
3889AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
3890 MachineOperand &RHS,
3891 MachineIRBuilder &MIRBuilder) const {
3892 const std::array<std::array<unsigned, 2>, 3> OpcTable{
3893 {{AArch64::ADDSXri, AArch64::ADDSWri},
3894 {AArch64::ADDSXrs, AArch64::ADDSWrs},
3895 {AArch64::ADDSXrr, AArch64::ADDSWrr}}};
3896 return emitBinOp(OpcTable, Dst, LHS, RHS, MIRBuilder);
3897}
3898
3899MachineInstr *
3900AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3901 MachineIRBuilder &MIRBuilder) const {
3902 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3903 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3904 return emitADDS(Is32Bit ? AArch64::WZR : AArch64::XZR, LHS, RHS, MIRBuilder);
3905}
3906
3907MachineInstr *
3908AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3909 MachineIRBuilder &MIRBuilder) const {
3910 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3911 unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3912 bool Is32Bit = (RegSize == 32);
3913 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3914 {AArch64::ANDSWrr, AArch64::ANDSWri}};
3915 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3916
3917 // We might be able to fold in an immediate into the TST. We need to make sure
3918 // it's a logical immediate though, since ANDS requires that.
3919 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3920 bool IsImmForm = ValAndVReg.hasValue() &&
3921 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3922 unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3923 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3924
3925 if (IsImmForm)
3926 TstMI.addImm(
3927 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3928 else
3929 TstMI.addUse(RHS);
3930
3931 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3932 return &*TstMI;
3933}
3934
3935std::pair<MachineInstr *, CmpInst::Predicate>
3936AArch64InstructionSelector::emitIntegerCompare(
3937 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3938 MachineIRBuilder &MIRBuilder) const {
3939 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")((LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3939, __PRETTY_FUNCTION__))
;
3940 assert(Predicate.isPredicate() && "Expected predicate?")((Predicate.isPredicate() && "Expected predicate?") ?
static_cast<void> (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3940, __PRETTY_FUNCTION__))
;
3941 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3942
3943 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3944
3945 // Fold the compare if possible.
3946 MachineInstr *FoldCmp =
3947 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3948 if (FoldCmp)
3949 return {FoldCmp, P};
3950
3951 // Can't fold into a CMN. Just emit a normal compare.
3952 unsigned CmpOpc = 0;
3953 Register ZReg;
3954
3955 LLT CmpTy = MRI.getType(LHS.getReg());
3956 assert((CmpTy.isScalar() || CmpTy.isPointer()) &&(((CmpTy.isScalar() || CmpTy.isPointer()) && "Expected scalar or pointer"
) ? static_cast<void> (0) : __assert_fail ("(CmpTy.isScalar() || CmpTy.isPointer()) && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3957, __PRETTY_FUNCTION__))
3957 "Expected scalar or pointer")(((CmpTy.isScalar() || CmpTy.isPointer()) && "Expected scalar or pointer"
) ? static_cast<void> (0) : __assert_fail ("(CmpTy.isScalar() || CmpTy.isPointer()) && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3957, __PRETTY_FUNCTION__))
;
3958 if (CmpTy == LLT::scalar(32)) {
3959 CmpOpc = AArch64::SUBSWrr;
3960 ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3961 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3962 CmpOpc = AArch64::SUBSXrr;
3963 ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3964 } else {
3965 return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
3966 }
3967
3968 // Try to match immediate forms.
3969 MachineInstr *ImmedCmp =
3970 tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
3971 if (ImmedCmp)
3972 return {ImmedCmp, P};
3973
3974 // If we don't have an immediate, we may have a shift which can be folded
3975 // into the compare.
3976 MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
3977 if (ShiftedCmp)
3978 return {ShiftedCmp, P};
3979
3980 auto CmpMI =
3981 MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
3982 // Make sure that we can constrain the compare that we emitted.
3983 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3984 return {&*CmpMI, P};
3985}
3986
3987MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3988 Optional<Register> Dst, Register Op1, Register Op2,
3989 MachineIRBuilder &MIRBuilder) const {
3990 // We implement a vector concat by:
3991 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3992 // 2. Insert the upper vector into the destination's upper element
3993 // TODO: some of this code is common with G_BUILD_VECTOR handling.
3994 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3995
3996 const LLT Op1Ty = MRI.getType(Op1);
3997 const LLT Op2Ty = MRI.getType(Op2);
3998
3999 if (Op1Ty != Op2Ty) {
4000 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4001 return nullptr;
4002 }
4003 assert(Op1Ty.isVector() && "Expected a vector for vector concat")((Op1Ty.isVector() && "Expected a vector for vector concat"
) ? static_cast<void> (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4003, __PRETTY_FUNCTION__))
;
4004
4005 if (Op1Ty.getSizeInBits() >= 128) {
4006 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4007 return nullptr;
4008 }
4009
4010 // At the moment we just support 64 bit vector concats.
4011 if (Op1Ty.getSizeInBits() != 64) {
4012 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4013 return nullptr;
4014 }
4015
4016 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4017 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4018 const TargetRegisterClass *DstRC =
4019 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4020
4021 MachineInstr *WidenedOp1 =
4022 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4023 MachineInstr *WidenedOp2 =
4024 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4025 if (!WidenedOp1 || !WidenedOp2) {
4026 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4027 return nullptr;
4028 }
4029
4030 // Now do the insert of the upper element.
4031 unsigned InsertOpc, InsSubRegIdx;
4032 std::tie(InsertOpc, InsSubRegIdx) =
4033 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4034
4035 if (!Dst)
4036 Dst = MRI.createVirtualRegister(DstRC);
4037 auto InsElt =
4038 MIRBuilder
4039 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4040 .addImm(1) /* Lane index */
4041 .addUse(WidenedOp2->getOperand(0).getReg())
4042 .addImm(0);
4043 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4044 return &*InsElt;
4045}
4046
4047MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
4048 MachineInstr &I, MachineRegisterInfo &MRI) const {
4049 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4050, __PRETTY_FUNCTION__))
4050 "Expected a G_FCONSTANT!")((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4050, __PRETTY_FUNCTION__))
;
4051 MachineOperand &ImmOp = I.getOperand(1);
4052 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
4053
4054 // Only handle 32 and 64 bit defs for now.
4055 if (DefSize != 32 && DefSize != 64)
4056 return nullptr;
4057
4058 // Don't handle null values using FMOV.
4059 if (ImmOp.getFPImm()->isNullValue())
4060 return nullptr;
4061
4062 // Get the immediate representation for the FMOV.
4063 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
4064 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
4065 : AArch64_AM::getFP64Imm(ImmValAPF);
4066
4067 // If this is -1, it means the immediate can't be represented as the requested
4068 // floating point value. Bail.
4069 if (Imm == -1)
4070 return nullptr;
4071
4072 // Update MI to represent the new FMOV instruction, constrain it, and return.
4073 ImmOp.ChangeToImmediate(Imm);
4074 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
4075 I.setDesc(TII.get(MovOpc));
4076 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4077 return &I;
4078}
4079
4080MachineInstr *
4081AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4082 MachineIRBuilder &MIRBuilder) const {
4083 // CSINC increments the result when the predicate is false. Invert it.
4084 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4085 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4086 auto I =
4087 MIRBuilder
4088 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
4089 .addImm(InvCC);
4090 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4091 return &*I;
4092}
4093
4094bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4095 MachineIRBuilder MIB(I);
4096 MachineRegisterInfo &MRI = *MIB.getMRI();
4097 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
4098
4099 // We want to recognize this pattern:
4100 //
4101 // $z = G_FCMP pred, $x, $y
4102 // ...
4103 // $w = G_SELECT $z, $a, $b
4104 //
4105 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4106 // some copies/truncs in between.)
4107 //
4108 // If we see this, then we can emit something like this:
4109 //
4110 // fcmp $x, $y
4111 // fcsel $w, $a, $b, pred
4112 //
4113 // Rather than emitting both of the rather long sequences in the standard
4114 // G_FCMP/G_SELECT select methods.
4115
4116 // First, check if the condition is defined by a compare.
4117 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4118 while (CondDef) {
4119 // We can only fold if all of the defs have one use.
4120 Register CondDefReg = CondDef->getOperand(0).getReg();
4121 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4122 // Unless it's another select.
4123 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4124 if (CondDef == &UI)
4125 continue;
4126 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4127 return false;
4128 }
4129 }
4130
4131 // We can skip over G_TRUNC since the condition is 1-bit.
4132 // Truncating/extending can have no impact on the value.
4133 unsigned Opc = CondDef->getOpcode();
4134 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4135 break;
4136
4137 // Can't see past copies from physregs.
4138 if (Opc == TargetOpcode::COPY &&
4139 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4140 return false;
4141
4142 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4143 }
4144
4145 // Is the condition defined by a compare?
4146 if (!CondDef)
4147 return false;
4148
4149 unsigned CondOpc = CondDef->getOpcode();
4150 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4151 return false;
4152
4153 AArch64CC::CondCode CondCode;
4154 if (CondOpc == TargetOpcode::G_ICMP) {
4155 MachineInstr *Cmp;
4156 CmpInst::Predicate Pred;
4157
4158 std::tie(Cmp, Pred) =
4159 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4160 CondDef->getOperand(1), MIB);
4161
4162 if (!Cmp) {
4163 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4164 return false;
4165 }
4166
4167 // Have to collect the CondCode after emitIntegerCompare, since it can
4168 // update the predicate.
4169 CondCode = changeICMPPredToAArch64CC(Pred);
4170 } else {
4171 // Get the condition code for the select.
4172 AArch64CC::CondCode CondCode2;
4173 changeFCMPPredToAArch64CC(
4174 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
4175 CondCode2);
4176
4177 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4178 // instructions to emit the comparison.
4179 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4180 // unnecessary.
4181 if (CondCode2 != AArch64CC::AL)
4182 return false;
4183
4184 // Make sure we'll be able to select the compare.
4185 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
4186 if (!CmpOpc)
4187 return false;
4188
4189 // Emit a new compare.
4190 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
4191 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
4192 Cmp.addUse(CondDef->getOperand(3).getReg());
4193 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
4194 }
4195
4196 // Emit the select.
4197 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
4198 auto CSel =
4199 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
4200 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
4201 .addImm(CondCode);
4202 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
4203 I.eraseFromParent();
4204 return true;
4205}
4206
4207MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4208 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4209 MachineIRBuilder &MIRBuilder) const {
4210 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4211, __PRETTY_FUNCTION__))
4211 "Unexpected MachineOperand")((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4211, __PRETTY_FUNCTION__))
;
4212 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4213 // We want to find this sort of thing:
4214 // x = G_SUB 0, y
4215 // G_ICMP z, x
4216 //
4217 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4218 // e.g:
4219 //
4220 // cmn z, y
4221
4222 // Helper lambda to detect the subtract followed by the compare.
4223 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4224 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4225 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4226 return false;
4227
4228 // Need to make sure NZCV is the same at the end of the transformation.
4229 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4230 return false;
4231
4232 // We want to match against SUBs.
4233 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4234 return false;
4235
4236 // Make sure that we're getting
4237 // x = G_SUB 0, y
4238 auto ValAndVReg =
4239 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4240 if (!ValAndVReg || ValAndVReg->Value != 0)
4241 return false;
4242
4243 // This can safely be represented as a CMN.
4244 return true;
4245 };
4246
4247 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4248 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4249 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4250 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4251 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4252
4253 // Given this:
4254 //
4255 // x = G_SUB 0, y
4256 // G_ICMP x, z
4257 //
4258 // Produce this:
4259 //
4260 // cmn y, z
4261 if (IsCMN(LHSDef, CC))
4262 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4263
4264 // Same idea here, but with the RHS of the compare instead:
4265 //
4266 // Given this:
4267 //
4268 // x = G_SUB 0, y
4269 // G_ICMP z, x
4270 //
4271 // Produce this:
4272 //
4273 // cmn z, y
4274 if (IsCMN(RHSDef, CC))
4275 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4276
4277 // Given this:
4278 //
4279 // z = G_AND x, y
4280 // G_ICMP z, 0
4281 //
4282 // Produce this if the compare is signed:
4283 //
4284 // tst x, y
4285 if (!isUnsignedICMPPred(P) && LHSDef &&
4286 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4287 // Make sure that the RHS is 0.
4288 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4289 if (!ValAndVReg || ValAndVReg->Value != 0)
4290 return nullptr;
4291
4292 return emitTST(LHSDef->getOperand(1).getReg(),
4293 LHSDef->getOperand(2).getReg(), MIRBuilder);
4294 }
4295
4296 return nullptr;
4297}
4298
4299MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
4300 MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
4301 MachineIRBuilder &MIB) const {
4302 // Attempt to select the immediate form of an integer compare.
4303 MachineRegisterInfo &MRI = *MIB.getMRI();
4304 auto Ty = MRI.getType(LHS.getReg());
4305 assert(!Ty.isVector() && "Expected scalar or pointer only?")((!Ty.isVector() && "Expected scalar or pointer only?"
) ? static_cast<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar or pointer only?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4305, __PRETTY_FUNCTION__))
;
4306 unsigned Size = Ty.getSizeInBits();
4307 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit compare only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit compare only?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4308, __PRETTY_FUNCTION__))
4308 "Expected 32 bit or 64 bit compare only?")(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit compare only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit compare only?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4308, __PRETTY_FUNCTION__))
;
4309
4310 // Check if this is a case we can already handle.
4311 InstructionSelector::ComplexRendererFns ImmFns;
4312 ImmFns = selectArithImmed(RHS);
4313
4314 if (!ImmFns) {
4315 // We didn't get a rendering function, but we may still have a constant.
4316 auto MaybeImmed = getImmedFromMO(RHS);
4317 if (!MaybeImmed)
4318 return nullptr;
4319
4320 // We have a constant, but it doesn't fit. Try adjusting it by one and
4321 // updating the predicate if possible.
4322 uint64_t C = *MaybeImmed;
4323 CmpInst::Predicate NewP;
4324 switch (P) {
4325 default:
4326 return nullptr;
4327 case CmpInst::ICMP_SLT:
4328 case CmpInst::ICMP_SGE:
4329 // Check for
4330 //
4331 // x slt c => x sle c - 1
4332 // x sge c => x sgt c - 1
4333 //
4334 // When c is not the smallest possible negative number.
4335 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN(-9223372036854775807L -1)) ||
4336 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN(-2147483647-1)))
4337 return nullptr;
4338 NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
4339 C -= 1;
4340 break;
4341 case CmpInst::ICMP_ULT:
4342 case CmpInst::ICMP_UGE:
4343 // Check for
4344 //
4345 // x ult c => x ule c - 1
4346 // x uge c => x ugt c - 1
4347 //
4348 // When c is not zero.
4349 if (C == 0)
4350 return nullptr;
4351 NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
4352 C -= 1;
4353 break;
4354 case CmpInst::ICMP_SLE:
4355 case CmpInst::ICMP_SGT:
4356 // Check for
4357 //
4358 // x sle c => x slt c + 1
4359 // x sgt c => s sge c + 1
4360 //
4361 // When c is not the largest possible signed integer.
4362 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX(2147483647)) ||
4363 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX(9223372036854775807L)))
4364 return nullptr;
4365 NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
4366 C += 1;
4367 break;
4368 case CmpInst::ICMP_ULE:
4369 case CmpInst::ICMP_UGT:
4370 // Check for
4371 //
4372 // x ule c => x ult c + 1
4373 // x ugt c => s uge c + 1
4374 //
4375 // When c is not the largest possible unsigned integer.
4376 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX(4294967295U)) ||
4377 (Size == 64 && C == UINT64_MAX(18446744073709551615UL)))
4378 return nullptr;
4379 NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
4380 C += 1;
4381 break;
4382 }
4383
4384 // Check if the new constant is valid.
4385 if (Size == 32)
4386 C = static_cast<uint32_t>(C);
4387 ImmFns = select12BitValueWithLeftShift(C);
4388 if (!ImmFns)
4389 return nullptr;
4390 P = NewP;
4391 }
4392
4393 // At this point, we know we can select an immediate form. Go ahead and do
4394 // that.
4395 Register ZReg;
4396 unsigned Opc;
4397 if (Size == 32) {
4398 ZReg = AArch64::WZR;
4399 Opc = AArch64::SUBSWri;
4400 } else {
4401 ZReg = AArch64::XZR;
4402 Opc = AArch64::SUBSXri;
4403 }
4404
4405 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4406 for (auto &RenderFn : *ImmFns)
4407 RenderFn(CmpMI);
4408 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4409 return &*CmpMI;
4410}
4411
4412MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
4413 MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
4414 // We are looking for the following pattern:
4415 //
4416 // shift = G_SHL/ASHR/LHSR y, c
4417 // ...
4418 // cmp = G_ICMP pred, something, shift
4419 //
4420 // Since we will select the G_ICMP to a SUBS, we can potentially fold the
4421 // shift into the subtract.
4422 static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
4423 static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
4424 auto ImmFns = selectShiftedRegister(RHS);
4425 if (!ImmFns)
4426 return nullptr;
4427 MachineRegisterInfo &MRI = *MIB.getMRI();
4428 auto Ty = MRI.getType(LHS.getReg());
4429 assert(!Ty.isVector() && "Expected scalar or pointer only?")((!Ty.isVector() && "Expected scalar or pointer only?"
) ? static_cast<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar or pointer only?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4429, __PRETTY_FUNCTION__))
;
4430 unsigned Size = Ty.getSizeInBits();
4431 bool Idx = (Size == 64);
4432 Register ZReg = ZRegTable[Idx];
4433 unsigned Opc = OpcTable[Idx];
4434 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4435 for (auto &RenderFn : *ImmFns)
4436 RenderFn(CmpMI);
4437 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4438 return &*CmpMI;
4439}
4440
4441bool AArch64InstructionSelector::tryOptShuffleDupLane(
4442 MachineInstr &I, LLT DstTy, LLT SrcTy, ArrayRef<int> Mask,
4443 MachineRegisterInfo &MRI) const {
4444 assert(I.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR)((I.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4444, __PRETTY_FUNCTION__))
;
4445
4446 // We assume that scalar->vector splats have been been handled in the
4447 // post-legalizer combiner to G_DUP. However splats of a source vector's
4448 // lane don't fit that pattern, detect it here:
4449 // %res = G_SHUFFLE_VECTOR %src:<n x ty>, undef, <n x i32> splat(lane-idx)
4450 // =>
4451 // %res = DUPv[N][Ty]lane %src, lane-idx
4452 // FIXME: this case should be covered by re-implementing the perfect shuffle
4453 // codegen mechanism.
4454
4455 auto LaneIdx = getSplatIndex(I);
4456 if (!LaneIdx)
4457 return false;
4458
4459 // The lane idx should be within the first source vector.
4460 if (*LaneIdx >= SrcTy.getNumElements())
4461 return false;
4462
4463 if (DstTy != SrcTy)
4464 return false;
4465
4466 LLT ScalarTy = SrcTy.getElementType();
4467 unsigned ScalarSize = ScalarTy.getSizeInBits();
4468
4469 unsigned Opc = 0;
4470 switch (SrcTy.getNumElements()) {
4471 case 2:
4472 if (ScalarSize == 64)
4473 Opc = AArch64::DUPv2i64lane;
4474 break;
4475 case 4:
4476 if (ScalarSize == 32)
4477 Opc = AArch64::DUPv4i32lane;
4478 break;
4479 case 8:
4480 if (ScalarSize == 16)
4481 Opc = AArch64::DUPv8i16lane;
4482 break;
4483 case 16:
4484 if (ScalarSize == 8)
4485 Opc = AArch64::DUPv16i8lane;
4486 break;
4487 default:
4488 break;
4489 }
4490 if (!Opc)
4491 return false;
4492
4493 MachineIRBuilder MIB(I);
4494 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()},
4495 {I.getOperand(1).getReg()})
4496 .addImm(*LaneIdx);
4497 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
4498 I.eraseFromParent();
4499 return true;
4500}
4501
4502bool AArch64InstructionSelector::selectShuffleVector(
4503 MachineInstr &I, MachineRegisterInfo &MRI) const {
4504 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4505 Register Src1Reg = I.getOperand(1).getReg();
4506 const LLT Src1Ty = MRI.getType(Src1Reg);
4507 Register Src2Reg = I.getOperand(2).getReg();
4508 const LLT Src2Ty = MRI.getType(Src2Reg);
4509 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4510
4511 MachineBasicBlock &MBB = *I.getParent();
4512 MachineFunction &MF = *MBB.getParent();
4513 LLVMContext &Ctx = MF.getFunction().getContext();
4514
4515 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4516 // it's originated from a <1 x T> type. Those should have been lowered into
4517 // G_BUILD_VECTOR earlier.
4518 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4519 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4520 return false;
4521 }
4522
4523 if (tryOptShuffleDupLane(I, DstTy, Src1Ty, Mask, MRI))
4524 return true;
4525
4526 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4527
4528 SmallVector<Constant *, 64> CstIdxs;
4529 for (int Val : Mask) {
4530 // For now, any undef indexes we'll just assume to be 0. This should be
4531 // optimized in future, e.g. to select DUP etc.
4532 Val = Val < 0 ? 0 : Val;
4533 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4534 unsigned Offset = Byte + Val * BytesPerElt;
4535 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4536 }
4537 }
4538
4539 MachineIRBuilder MIRBuilder(I);
4540
4541 // Use a constant pool to load the index vector for TBL.
4542 Constant *CPVal = ConstantVector::get(CstIdxs);
4543 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4544 if (!IndexLoad) {
4545 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4546 return false;
4547 }
4548
4549 if (DstTy.getSizeInBits() != 128) {
4550 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")((DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"
) ? static_cast<void> (0) : __assert_fail ("DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4550, __PRETTY_FUNCTION__))
;
4551 // This case can be done with TBL1.
4552 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4553 if (!Concat) {
4554 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4555 return false;
4556 }
4557
4558 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4559 IndexLoad =
4560 emitScalarToVector(64, &AArch64::FPR128RegClass,
4561 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4562
4563 auto TBL1 = MIRBuilder.buildInstr(
4564 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4565 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4566 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4567
4568 auto Copy =
4569 MIRBuilder
4570 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4571 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4572 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4573 I.eraseFromParent();
4574 return true;
4575 }
4576
4577 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4578 // Q registers for regalloc.
4579 auto RegSeq = MIRBuilder
4580 .buildInstr(TargetOpcode::REG_SEQUENCE,
4581 {&AArch64::QQRegClass}, {Src1Reg})
4582 .addImm(AArch64::qsub0)
4583 .addUse(Src2Reg)
4584 .addImm(AArch64::qsub1);
4585
4586 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4587 {RegSeq, IndexLoad->getOperand(0)});
4588 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4589 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4590 I.eraseFromParent();
4591 return true;
4592}
4593
4594MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4595 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4596 unsigned LaneIdx, const RegisterBank &RB,
4597 MachineIRBuilder &MIRBuilder) const {
4598 MachineInstr *InsElt = nullptr;
4599 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4600 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4601
4602 // Create a register to define with the insert if one wasn't passed in.
4603 if (!DstReg)
4604 DstReg = MRI.createVirtualRegister(DstRC);
4605
4606 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4607 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4608
4609 if (RB.getID() == AArch64::FPRRegBankID) {
4610 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4611 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4612 .addImm(LaneIdx)
4613 .addUse(InsSub->getOperand(0).getReg())
4614 .addImm(0);
4615 } else {
4616 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4617 .addImm(LaneIdx)
4618 .addUse(EltReg);
4619 }
4620
4621 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4622 return InsElt;
4623}
4624
4625bool AArch64InstructionSelector::selectInsertElt(
4626 MachineInstr &I, MachineRegisterInfo &MRI) const {
4627 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)((I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4627, __PRETTY_FUNCTION__))
;
4628
4629 // Get information on the destination.
4630 Register DstReg = I.getOperand(0).getReg();
4631 const LLT DstTy = MRI.getType(DstReg);
4632 unsigned VecSize = DstTy.getSizeInBits();
4633
4634 // Get information on the element we want to insert into the destination.
4635 Register EltReg = I.getOperand(2).getReg();
4636 const LLT EltTy = MRI.getType(EltReg);
4637 unsigned EltSize = EltTy.getSizeInBits();
4638 if (EltSize < 16 || EltSize > 64)
4639 return false; // Don't support all element types yet.
4640
4641 // Find the definition of the index. Bail out if it's not defined by a
4642 // G_CONSTANT.
4643 Register IdxReg = I.getOperand(3).getReg();
4644 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4645 if (!VRegAndVal)
4646 return false;
4647 unsigned LaneIdx = VRegAndVal->Value;
4648
4649 // Perform the lane insert.
4650 Register SrcReg = I.getOperand(1).getReg();
4651 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4652 MachineIRBuilder MIRBuilder(I);
4653
4654 if (VecSize < 128) {
4655 // If the vector we're inserting into is smaller than 128 bits, widen it
4656 // to 128 to do the insert.
4657 MachineInstr *ScalarToVec = emitScalarToVector(
4658 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4659 if (!ScalarToVec)
4660 return false;
4661 SrcReg = ScalarToVec->getOperand(0).getReg();
4662 }
4663
4664 // Create an insert into a new FPR128 register.
4665 // Note that if our vector is already 128 bits, we end up emitting an extra
4666 // register.
4667 MachineInstr *InsMI =
4668 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4669
4670 if (VecSize < 128) {
4671 // If we had to widen to perform the insert, then we have to demote back to
4672 // the original size to get the result we want.
4673 Register DemoteVec = InsMI->getOperand(0).getReg();
4674 const TargetRegisterClass *RC =
4675 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4676 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4677 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4678 return false;
4679 }
4680 unsigned SubReg = 0;
4681 if (!getSubRegForClass(RC, TRI, SubReg))
4682 return false;
4683 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4684 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
4685 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
4686 return false;
4687 }
4688 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4689 .addReg(DemoteVec, 0, SubReg);
4690 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4691 } else {
4692 // No widening needed.
4693 InsMI->getOperand(0).setReg(DstReg);
4694 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4695 }
4696
4697 I.eraseFromParent();
4698 return true;
4699}
4700
4701bool AArch64InstructionSelector::tryOptConstantBuildVec(
4702 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4703 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4703, __PRETTY_FUNCTION__))
;
4704 assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!")((DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!"
) ? static_cast<void> (0) : __assert_fail ("DstTy.getSizeInBits() <= 128 && \"Unexpected build_vec type!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4704, __PRETTY_FUNCTION__))
;
4705 if (DstTy.getSizeInBits() < 32)
4706 return false;
4707 // Check if we're building a constant vector, in which case we want to
4708 // generate a constant pool load instead of a vector insert sequence.
4709 SmallVector<Constant *, 16> Csts;
4710 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4711 // Try to find G_CONSTANT or G_FCONSTANT
4712 auto *OpMI =
4713 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4714 if (OpMI)
4715 Csts.emplace_back(
4716 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4717 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4718 I.getOperand(Idx).getReg(), MRI)))
4719 Csts.emplace_back(
4720 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4721 else
4722 return false;
4723 }
4724 Constant *CV = ConstantVector::get(Csts);
4725 MachineIRBuilder MIB(I);
4726 auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4727 if (!CPLoad) {
4728 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for build_vector"
; } } while (false)
;
4729 return false;
4730 }
4731 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4732 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4733 *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4734 MRI);
4735 I.eraseFromParent();
4736 return true;
4737}
4738
4739bool AArch64InstructionSelector::selectBuildVector(
4740 MachineInstr &I, MachineRegisterInfo &MRI) const {
4741 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4741, __PRETTY_FUNCTION__))
;
4742 // Until we port more of the optimized selections, for now just use a vector
4743 // insert sequence.
4744 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4745 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4746 unsigned EltSize = EltTy.getSizeInBits();
4747
4748 if (tryOptConstantBuildVec(I, DstTy, MRI))
4749 return true;
4750 if (EltSize < 16 || EltSize > 64)
4751 return false; // Don't support all element types yet.
4752 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4753 MachineIRBuilder MIRBuilder(I);
4754
4755 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4756 MachineInstr *ScalarToVec =
4757 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4758 I.getOperand(1).getReg(), MIRBuilder);
4759 if (!ScalarToVec)
4760 return false;
4761
4762 Register DstVec = ScalarToVec->getOperand(0).getReg();
4763 unsigned DstSize = DstTy.getSizeInBits();
4764
4765 // Keep track of the last MI we inserted. Later on, we might be able to save
4766 // a copy using it.
4767 MachineInstr *PrevMI = nullptr;
4768 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4769 // Note that if we don't do a subregister copy, we can end up making an
4770 // extra register.
4771 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4772 MIRBuilder);
4773 DstVec = PrevMI->getOperand(0).getReg();
4774 }
4775
4776 // If DstTy's size in bits is less than 128, then emit a subregister copy
4777 // from DstVec to the last register we've defined.
4778 if (DstSize < 128) {
4779 // Force this to be FPR using the destination vector.
4780 const TargetRegisterClass *RC =
4781 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4782 if (!RC)
4783 return false;
4784 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4785 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4786 return false;
4787 }
4788
4789 unsigned SubReg = 0;
4790 if (!getSubRegForClass(RC, TRI, SubReg))
4791 return false;
4792 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4793 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
4794 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
4795 return false;
4796 }
4797
4798 Register Reg = MRI.createVirtualRegister(RC);
4799 Register DstReg = I.getOperand(0).getReg();
4800
4801 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4802 .addReg(DstVec, 0, SubReg);
4803 MachineOperand &RegOp = I.getOperand(1);
4804 RegOp.setReg(Reg);
4805 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4806 } else {
4807 // We don't need a subregister copy. Save a copy by re-using the
4808 // destination register on the final insert.
4809 assert(PrevMI && "PrevMI was null?")((PrevMI && "PrevMI was null?") ? static_cast<void
> (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4809, __PRETTY_FUNCTION__))
;
4810 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4811 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4812 }
4813
4814 I.eraseFromParent();
4815 return true;
4816}
4817
4818/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4819/// ID if it exists, and 0 otherwise.
4820static unsigned findIntrinsicID(MachineInstr &I) {
4821 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4822 return Op.isIntrinsicID();
4823 });
4824 if (IntrinOp == I.operands_end())
4825 return 0;
4826 return IntrinOp->getIntrinsicID();
4827}
4828
4829bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4830 MachineInstr &I, MachineRegisterInfo &MRI) const {
4831 // Find the intrinsic ID.
4832 unsigned IntrinID = findIntrinsicID(I);
4833 if (!IntrinID)
4834 return false;
4835 MachineIRBuilder MIRBuilder(I);
4836
4837 // Select the instruction.
4838 switch (IntrinID) {
4839 default:
4840 return false;
4841 case Intrinsic::trap:
4842 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4843 break;
4844 case Intrinsic::debugtrap:
4845 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4846 break;
4847 }
4848
4849 I.eraseFromParent();
4850 return true;
4851}
4852
4853bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4854 MachineRegisterInfo &MRI) {
4855 unsigned IntrinID = findIntrinsicID(I);
4856 if (!IntrinID)
4857 return false;
4858 MachineIRBuilder MIRBuilder(I);
4859
4860 switch (IntrinID) {
4861 default:
4862 break;
4863 case Intrinsic::aarch64_crypto_sha1h: {
4864 Register DstReg = I.getOperand(0).getReg();
4865 Register SrcReg = I.getOperand(2).getReg();
4866
4867 // FIXME: Should this be an assert?
4868 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4869 MRI.getType(SrcReg).getSizeInBits() != 32)
4870 return false;
4871
4872 // The operation has to happen on FPRs. Set up some new FPR registers for
4873 // the source and destination if they are on GPRs.
4874 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4875 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4876 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4877
4878 // Make sure the copy ends up getting constrained properly.
4879 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4880 AArch64::GPR32RegClass, MRI);
4881 }
4882
4883 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4884 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4885
4886 // Actually insert the instruction.
4887 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4888 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4889
4890 // Did we create a new register for the destination?
4891 if (DstReg != I.getOperand(0).getReg()) {
4892 // Yep. Copy the result of the instruction back into the original
4893 // destination.
4894 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4895 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4896 AArch64::GPR32RegClass, MRI);
4897 }
4898
4899 I.eraseFromParent();
4900 return true;
4901 }
4902 case Intrinsic::frameaddress:
4903 case Intrinsic::returnaddress: {
4904 MachineFunction &MF = *I.getParent()->getParent();
4905 MachineFrameInfo &MFI = MF.getFrameInfo();
4906
4907 unsigned Depth = I.getOperand(2).getImm();
4908 Register DstReg = I.getOperand(0).getReg();
4909 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4910
4911 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4912 if (!MFReturnAddr) {
4913 // Insert the copy from LR/X30 into the entry block, before it can be
4914 // clobbered by anything.
4915 MFI.setReturnAddressIsTaken(true);
4916 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
4917 AArch64::GPR64RegClass);
4918 }
4919
4920 if (STI.hasV8_3aOps()) {
4921 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
4922 } else {
4923 MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
4924 MIRBuilder.buildInstr(AArch64::XPACLRI);
4925 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4926 }
4927
4928 I.eraseFromParent();
4929 return true;
4930 }
4931
4932 MFI.setFrameAddressIsTaken(true);
4933 Register FrameAddr(AArch64::FP);
4934 while (Depth--) {
4935 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4936 auto Ldr =
4937 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4938 .addImm(0);
4939 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4940 FrameAddr = NextFrame;
4941 }
4942
4943 if (IntrinID == Intrinsic::frameaddress)
4944 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4945 else {
4946 MFI.setReturnAddressIsTaken(true);
4947
4948 if (STI.hasV8_3aOps()) {
4949 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4950 MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
4951 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
4952 } else {
4953 MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
4954 MIRBuilder.buildInstr(AArch64::XPACLRI);
4955 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4956 }
4957 }
4958
4959 I.eraseFromParent();
4960 return true;
4961 }
4962 }
4963 return false;
4964}
4965
4966InstructionSelector::ComplexRendererFns
4967AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4968 auto MaybeImmed = getImmedFromMO(Root);
4969 if (MaybeImmed == None || *MaybeImmed > 31)
4970 return None;
4971 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4972 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4973}
4974
4975InstructionSelector::ComplexRendererFns
4976AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4977 auto MaybeImmed = getImmedFromMO(Root);
4978 if (MaybeImmed == None || *MaybeImmed > 31)
4979 return None;
4980 uint64_t Enc = 31 - *MaybeImmed;
4981 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4982}
4983
4984InstructionSelector::ComplexRendererFns
4985AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4986 auto MaybeImmed = getImmedFromMO(Root);
4987 if (MaybeImmed == None || *MaybeImmed > 63)
4988 return None;
4989 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4990 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4991}
4992
4993InstructionSelector::ComplexRendererFns
4994AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4995 auto MaybeImmed = getImmedFromMO(Root);
4996 if (MaybeImmed == None || *MaybeImmed > 63)
4997 return None;
4998 uint64_t Enc = 63 - *MaybeImmed;
4999 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5000}
5001
5002/// Helper to select an immediate value that can be represented as a 12-bit
5003/// value shifted left by either 0 or 12. If it is possible to do so, return
5004/// the immediate and shift value. If not, return None.
5005///
5006/// Used by selectArithImmed and selectNegArithImmed.
5007InstructionSelector::ComplexRendererFns
5008AArch64InstructionSelector::select12BitValueWithLeftShift(
5009 uint64_t Immed) const {
5010 unsigned ShiftAmt;
5011 if (Immed >> 12 == 0) {
5012 ShiftAmt = 0;
5013 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5014 ShiftAmt = 12;
5015 Immed = Immed >> 12;
5016 } else
5017 return None;
5018
5019 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5020 return {{
5021 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5022 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5023 }};
5024}
5025
5026/// SelectArithImmed - Select an immediate value that can be represented as
5027/// a 12-bit value shifted left by either 0 or 12. If so, return true with
5028/// Val set to the 12-bit value and Shift set to the shifter operand.
5029InstructionSelector::ComplexRendererFns
5030AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5031 // This function is called from the addsub_shifted_imm ComplexPattern,
5032 // which lists [imm] as the list of opcode it's interested in, however
5033 // we still need to check whether the operand is actually an immediate
5034 // here because the ComplexPattern opcode list is only used in
5035 // root-level opcode matching.
5036 auto MaybeImmed = getImmedFromMO(Root);
5037 if (MaybeImmed == None)
5038 return None;
5039 return select12BitValueWithLeftShift(*MaybeImmed);
5040}
5041
5042/// SelectNegArithImmed - As above, but negates the value before trying to
5043/// select it.
5044InstructionSelector::ComplexRendererFns
5045AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5046 // We need a register here, because we need to know if we have a 64 or 32
5047 // bit immediate.
5048 if (!Root.isReg())
5049 return None;
5050 auto MaybeImmed = getImmedFromMO(Root);
5051 if (MaybeImmed == None)
5052 return None;
5053 uint64_t Immed = *MaybeImmed;
5054
5055 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5056 // have the opposite effect on the C flag, so this pattern mustn't match under
5057 // those circumstances.
5058 if (Immed == 0)
5059 return None;
5060
5061 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5062 // the root.
5063 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5064 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5065 Immed = ~((uint32_t)Immed) + 1;
5066 else
5067 Immed = ~Immed + 1ULL;
5068
5069 if (Immed & 0xFFFFFFFFFF000000ULL)
5070 return None;
5071
5072 Immed &= 0xFFFFFFULL;
5073 return select12BitValueWithLeftShift(Immed);
5074}
5075
5076/// Return true if it is worth folding MI into an extended register. That is,
5077/// if it's safe to pull it into the addressing mode of a load or store as a
5078/// shift.
5079bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5080 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5081 // Always fold if there is one use, or if we're optimizing for size.
5082 Register DefReg = MI.getOperand(0).getReg();
5083 if (MRI.hasOneNonDBGUse(DefReg) ||
5084 MI.getParent()->getParent()->getFunction().hasMinSize())
5085 return true;
5086
5087 // It's better to avoid folding and recomputing shifts when we don't have a
5088 // fastpath.
5089 if (!STI.hasLSLFast())
5090 return false;
5091
5092 // We have a fastpath, so folding a shift in and potentially computing it
5093 // many times may be beneficial. Check if this is only used in memory ops.
5094 // If it is, then we should fold.
5095 return all_of(MRI.use_nodbg_instructions(DefReg),
5096 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5097}
5098
5099static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5100 switch (Type) {
5101 case AArch64_AM::SXTB:
5102 case AArch64_AM::SXTH:
5103 case AArch64_AM::SXTW:
5104 return true;
5105 default:
5106 return false;
5107 }
5108}
5109
5110InstructionSelector::ComplexRendererFns
5111AArch64InstructionSelector::selectExtendedSHL(
5112 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5113 unsigned SizeInBytes, bool WantsExt) const {
5114 assert(Base.isReg() && "Expected base to be a register operand")((Base.isReg() && "Expected base to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5114, __PRETTY_FUNCTION__))
;
5115 assert(Offset.isReg() && "Expected offset to be a register operand")((Offset.isReg() && "Expected offset to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5115, __PRETTY_FUNCTION__))
;
5116
5117 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5118 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5119 if (!OffsetInst)
5120 return None;
5121
5122 unsigned OffsetOpc = OffsetInst->getOpcode();
5123 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5124 return None;
5125
5126 // Make sure that the memory op is a valid size.
5127 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5128 if (LegalShiftVal == 0)
5129 return None;
5130 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5131 return None;
5132
5133 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5134 // register we will offset is the LHS, and the register containing the
5135 // constant is the RHS.
5136 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5137 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5138 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5139 if (!ValAndVReg) {
5140 // We didn't get a constant on the RHS. If the opcode is a shift, then
5141 // we're done.
5142 if (OffsetOpc == TargetOpcode::G_SHL)
5143 return None;
5144
5145 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5146 std::swap(OffsetReg, ConstantReg);
5147 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5148 if (!ValAndVReg)
5149 return None;
5150 }
5151
5152 // The value must fit into 3 bits, and must be positive. Make sure that is
5153 // true.
5154 int64_t ImmVal = ValAndVReg->Value;
5155
5156 // Since we're going to pull this into a shift, the constant value must be
5157 // a power of 2. If we got a multiply, then we need to check this.
5158 if (OffsetOpc == TargetOpcode::G_MUL) {
5159 if (!isPowerOf2_32(ImmVal))
5160 return None;
5161
5162 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5163 ImmVal = Log2_32(ImmVal);
5164 }
5165
5166 if ((ImmVal & 0x7) != ImmVal)
5167 return None;
5168
5169 // We are only allowed to shift by LegalShiftVal. This shift value is built
5170 // into the instruction, so we can't just use whatever we want.
5171 if (ImmVal != LegalShiftVal)
5172 return None;
5173
5174 unsigned SignExtend = 0;
5175 if (WantsExt) {
5176 // Check if the offset is defined by an extend.
5177 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5178 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5179 if (Ext == AArch64_AM::InvalidShiftExtend)
5180 return None;
5181
5182 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5183 // We only support SXTW for signed extension here.
5184 if (SignExtend && Ext != AArch64_AM::SXTW)
5185 return None;
5186
5187 // Need a 32-bit wide register here.
5188 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5189 OffsetReg = ExtInst->getOperand(1).getReg();
5190 OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
5191 }
5192
5193 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5194 // offset. Signify that we are shifting by setting the shift flag to 1.
5195 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5196 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5197 [=](MachineInstrBuilder &MIB) {
5198 // Need to add both immediates here to make sure that they are both
5199 // added to the instruction.
5200 MIB.addImm(SignExtend);
5201 MIB.addImm(1);
5202 }}};
5203}
5204
5205/// This is used for computing addresses like this:
5206///
5207/// ldr x1, [x2, x3, lsl #3]
5208///
5209/// Where x2 is the base register, and x3 is an offset register. The shift-left
5210/// is a constant value specific to this load instruction. That is, we'll never
5211/// see anything other than a 3 here (which corresponds to the size of the
5212/// element being loaded.)
5213InstructionSelector::ComplexRendererFns
5214AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5215 MachineOperand &Root, unsigned SizeInBytes) const {
5216 if (!Root.isReg())
5217 return None;
5218 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5219
5220 // We want to find something like this:
5221 //
5222 // val = G_CONSTANT LegalShiftVal
5223 // shift = G_SHL off_reg val
5224 // ptr = G_PTR_ADD base_reg shift
5225 // x = G_LOAD ptr
5226 //
5227 // And fold it into this addressing mode:
5228 //
5229 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5230
5231 // Check if we can find the G_PTR_ADD.
5232 MachineInstr *PtrAdd =
5233 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5234 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5235 return None;
5236
5237 // Now, try to match an opcode which will match our specific offset.
5238 // We want a G_SHL or a G_MUL.
5239 MachineInstr *OffsetInst =
5240 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5241 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5242 OffsetInst->getOperand(0), SizeInBytes,
5243 /*WantsExt=*/false);
5244}
5245
5246/// This is used for computing addresses like this:
5247///
5248/// ldr x1, [x2, x3]
5249///
5250/// Where x2 is the base register, and x3 is an offset register.
5251///
5252/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5253/// this will do so. Otherwise, it will return None.
5254InstructionSelector::ComplexRendererFns
5255AArch64InstructionSelector::selectAddrModeRegisterOffset(
5256 MachineOperand &Root) const {
5257 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5258
5259 // We need a GEP.
5260 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5261 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5262 return None;
5263
5264 // If this is used more than once, let's not bother folding.
5265 // TODO: Check if they are memory ops. If they are, then we can still fold
5266 // without having to recompute anything.
5267 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5268 return None;
5269
5270 // Base is the GEP's LHS, offset is its RHS.
5271 return {{[=](MachineInstrBuilder &MIB) {
5272 MIB.addUse(Gep->getOperand(1).getReg());
5273 },
5274 [=](MachineInstrBuilder &MIB) {
5275 MIB.addUse(Gep->getOperand(2).getReg());
5276 },
5277 [=](MachineInstrBuilder &MIB) {
5278 // Need to add both immediates here to make sure that they are both
5279 // added to the instruction.
5280 MIB.addImm(0);
5281 MIB.addImm(0);
5282 }}};
5283}
5284
5285/// This is intended to be equivalent to selectAddrModeXRO in
5286/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5287InstructionSelector::ComplexRendererFns
5288AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5289 unsigned SizeInBytes) const {
5290 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5291 if (!Root.isReg())
5292 return None;
5293 MachineInstr *PtrAdd =
5294 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5295 if (!PtrAdd)
5296 return None;
5297
5298 // Check for an immediates which cannot be encoded in the [base + imm]
5299 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5300 // end up with code like:
5301 //
5302 // mov x0, wide
5303 // add x1 base, x0
5304 // ldr x2, [x1, x0]
5305 //
5306 // In this situation, we can use the [base, xreg] addressing mode to save an
5307 // add/sub:
5308 //
5309 // mov x0, wide
5310 // ldr x2, [base, x0]
5311 auto ValAndVReg =
5312 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5313 if (ValAndVReg) {
5314 unsigned Scale = Log2_32(SizeInBytes);
5315 int64_t ImmOff = ValAndVReg->Value;
5316
5317 // Skip immediates that can be selected in the load/store addresing
5318 // mode.
5319 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5320 ImmOff < (0x1000 << Scale))
5321 return None;
5322
5323 // Helper lambda to decide whether or not it is preferable to emit an add.
5324 auto isPreferredADD = [](int64_t ImmOff) {
5325 // Constants in [0x0, 0xfff] can be encoded in an add.
5326 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5327 return true;
5328
5329 // Can it be encoded in an add lsl #12?
5330 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5331 return false;
5332
5333 // It can be encoded in an add lsl #12, but we may not want to. If it is
5334 // possible to select this as a single movz, then prefer that. A single
5335 // movz is faster than an add with a shift.
5336 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5337 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5338 };
5339
5340 // If the immediate can be encoded in a single add/sub, then bail out.
5341 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5342 return None;
5343 }
5344
5345 // Try to fold shifts into the addressing mode.
5346 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5347 if (AddrModeFns)
5348 return AddrModeFns;
5349
5350 // If that doesn't work, see if it's possible to fold in registers from
5351 // a GEP.
5352 return selectAddrModeRegisterOffset(Root);
5353}
5354
5355/// This is used for computing addresses like this:
5356///
5357/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5358///
5359/// Where we have a 64-bit base register, a 32-bit offset register, and an
5360/// extend (which may or may not be signed).
5361InstructionSelector::ComplexRendererFns
5362AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5363 unsigned SizeInBytes) const {
5364 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5365
5366 MachineInstr *PtrAdd =
5367 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5368 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5369 return None;
5370
5371 MachineOperand &LHS = PtrAdd->getOperand(1);
5372 MachineOperand &RHS = PtrAdd->getOperand(2);
5373 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5374
5375 // The first case is the same as selectAddrModeXRO, except we need an extend.
5376 // In this case, we try to find a shift and extend, and fold them into the
5377 // addressing mode.
5378 //
5379 // E.g.
5380 //
5381 // off_reg = G_Z/S/ANYEXT ext_reg
5382 // val = G_CONSTANT LegalShiftVal
5383 // shift = G_SHL off_reg val
5384 // ptr = G_PTR_ADD base_reg shift
5385 // x = G_LOAD ptr
5386 //
5387 // In this case we can get a load like this:
5388 //
5389 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5390 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5391 SizeInBytes, /*WantsExt=*/true);
5392 if (ExtendedShl)
5393 return ExtendedShl;
5394
5395 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5396 //
5397 // e.g.
5398 // ldr something, [base_reg, ext_reg, sxtw]
5399 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5400 return None;
5401
5402 // Check if this is an extend. We'll get an extend type if it is.
5403 AArch64_AM::ShiftExtendType Ext =
5404 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5405 if (Ext == AArch64_AM::InvalidShiftExtend)
5406 return None;
5407
5408 // Need a 32-bit wide register.
5409 MachineIRBuilder MIB(*PtrAdd);
5410 Register ExtReg =
5411 narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
5412 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5413
5414 // Base is LHS, offset is ExtReg.
5415 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5416 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5417 [=](MachineInstrBuilder &MIB) {
5418 MIB.addImm(SignExtend);
5419 MIB.addImm(0);
5420 }}};
5421}
5422
5423/// Select a "register plus unscaled signed 9-bit immediate" address. This
5424/// should only match when there is an offset that is not valid for a scaled
5425/// immediate addressing mode. The "Size" argument is the size in bytes of the
5426/// memory reference, which is needed here to know what is valid for a scaled
5427/// immediate.
5428InstructionSelector::ComplexRendererFns
5429AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5430 unsigned Size) const {
5431 MachineRegisterInfo &MRI =
5432 Root.getParent()->getParent()->getParent()->getRegInfo();
5433
5434 if (!Root.isReg())
5435 return None;
5436
5437 if (!isBaseWithConstantOffset(Root, MRI))
5438 return None;
5439
5440 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5441 if (!RootDef)
5442 return None;
5443
5444 MachineOperand &OffImm = RootDef->getOperand(2);
5445 if (!OffImm.isReg())
5446 return None;
5447 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5448 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5449 return None;
5450 int64_t RHSC;
5451 MachineOperand &RHSOp1 = RHS->getOperand(1);
5452 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5453 return None;
5454 RHSC = RHSOp1.getCImm()->getSExtValue();
5455
5456 // If the offset is valid as a scaled immediate, don't match here.
5457 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5458 return None;
5459 if (RHSC >= -256 && RHSC < 256) {
5460 MachineOperand &Base = RootDef->getOperand(1);
5461 return {{
5462 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5463 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5464 }};
5465 }
5466 return None;
5467}
5468
5469InstructionSelector::ComplexRendererFns
5470AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5471 unsigned Size,
5472 MachineRegisterInfo &MRI) const {
5473 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5474 return None;
5475 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5476 if (Adrp.getOpcode() != AArch64::ADRP)
5477 return None;
5478
5479 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5480 // TODO: Need to check GV's offset % size if doing offset folding into globals.
5481 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global")((Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"
) ? static_cast<void> (0) : __assert_fail ("Adrp.getOperand(1).getOffset() == 0 && \"Unexpected offset in global\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5481, __PRETTY_FUNCTION__))
;
5482 auto GV = Adrp.getOperand(1).getGlobal();
5483 if (GV->isThreadLocal())
5484 return None;
5485
5486 auto &MF = *RootDef.getParent()->getParent();
5487 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5488 return None;
5489
5490 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5491 MachineIRBuilder MIRBuilder(RootDef);
5492 Register AdrpReg = Adrp.getOperand(0).getReg();
5493 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5494 [=](MachineInstrBuilder &MIB) {
5495 MIB.addGlobalAddress(GV, /* Offset */ 0,
5496 OpFlags | AArch64II::MO_PAGEOFF |
5497 AArch64II::MO_NC);
5498 }}};
5499}
5500
5501/// Select a "register plus scaled unsigned 12-bit immediate" address. The
5502/// "Size" argument is the size in bytes of the memory reference, which
5503/// determines the scale.
5504InstructionSelector::ComplexRendererFns
5505AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5506 unsigned Size) const {
5507 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5508 MachineRegisterInfo &MRI = MF.getRegInfo();
5509
5510 if (!Root.isReg())
4
Calling 'MachineOperand::isReg'
7
Returning from 'MachineOperand::isReg'
8
Taking false branch
5511 return None;
5512
5513 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5514 if (!RootDef)
9
Assuming 'RootDef' is non-null
10
Taking false branch
5515 return None;
5516
5517 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
11
Assuming the condition is false
12
Taking false branch
5518 return {{
5519 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5520 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5521 }};
5522 }
5523
5524 CodeModel::Model CM = MF.getTarget().getCodeModel();
5525 // Check if we can fold in the ADD of small code model ADRP + ADD address.
5526 if (CM == CodeModel::Small) {
13
Assuming 'CM' is not equal to Small
14
Taking false branch
5527 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5528 if (OpFns)
5529 return OpFns;
5530 }
5531
5532 if (isBaseWithConstantOffset(Root, MRI)) {
15
Assuming the condition is true
16
Taking true branch
5533 MachineOperand &LHS = RootDef->getOperand(1);
5534 MachineOperand &RHS = RootDef->getOperand(2);
5535 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5536 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5537 if (LHSDef && RHSDef) {
17
Assuming 'LHSDef' is non-null
18
Assuming 'RHSDef' is non-null
19
Taking true branch
5538 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5539 unsigned Scale = Log2_32(Size);
20
Calling 'Log2_32'
22
Returning from 'Log2_32'
23
'Scale' initialized to 4294967295
5540 if ((RHSC & (Size - 1)) == 0 && RHSC
24.1
'RHSC' is >= 0
24.1
'RHSC' is >= 0
24.1
'RHSC' is >= 0
>= 0 && RHSC < (0x1000 << Scale)) {
24
Assuming the condition is true
25
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
5541 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5542 return {{
5543 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5544 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5545 }};
5546
5547 return {{
5548 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5549 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5550 }};
5551 }
5552 }
5553 }
5554
5555 // Before falling back to our general case, check if the unscaled
5556 // instructions can handle this. If so, that's preferable.
5557 if (selectAddrModeUnscaled(Root, Size).hasValue())
5558 return None;
5559
5560 return {{
5561 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5562 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5563 }};
5564}
5565
5566/// Given a shift instruction, return the correct shift type for that
5567/// instruction.
5568static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5569 // TODO: Handle AArch64_AM::ROR
5570 switch (MI.getOpcode()) {
5571 default:
5572 return AArch64_AM::InvalidShiftExtend;
5573 case TargetOpcode::G_SHL:
5574 return AArch64_AM::LSL;
5575 case TargetOpcode::G_LSHR:
5576 return AArch64_AM::LSR;
5577 case TargetOpcode::G_ASHR:
5578 return AArch64_AM::ASR;
5579 }
5580}
5581
5582/// Select a "shifted register" operand. If the value is not shifted, set the
5583/// shift operand to a default value of "lsl 0".
5584///
5585/// TODO: Allow shifted register to be rotated in logical instructions.
5586InstructionSelector::ComplexRendererFns
5587AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5588 if (!Root.isReg())
5589 return None;
5590 MachineRegisterInfo &MRI =
5591 Root.getParent()->getParent()->getParent()->getRegInfo();
5592
5593 // Check if the operand is defined by an instruction which corresponds to
5594 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5595 //
5596 // TODO: Handle AArch64_AM::ROR for logical instructions.
5597 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5598 if (!ShiftInst)
5599 return None;
5600 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5601 if (ShType == AArch64_AM::InvalidShiftExtend)
5602 return None;
5603 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5604 return None;
5605
5606 // Need an immediate on the RHS.
5607 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5608 auto Immed = getImmedFromMO(ShiftRHS);
5609 if (!Immed)
5610 return None;
5611
5612 // We have something that we can fold. Fold in the shift's LHS and RHS into
5613 // the instruction.
5614 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5615 Register ShiftReg = ShiftLHS.getReg();
5616
5617 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5618 unsigned Val = *Immed & (NumBits - 1);
5619 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5620
5621 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5622 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5623}
5624
5625AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5626 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5627 unsigned Opc = MI.getOpcode();
5628
5629 // Handle explicit extend instructions first.
5630 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5631 unsigned Size;
5632 if (Opc == TargetOpcode::G_SEXT)
5633 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5634 else
5635 Size = MI.getOperand(2).getImm();
5636 assert(Size != 64 && "Extend from 64 bits?")((Size != 64 && "Extend from 64 bits?") ? static_cast
<void> (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5636, __PRETTY_FUNCTION__))
;
5637 switch (Size) {
5638 case 8:
5639 return AArch64_AM::SXTB;
5640 case 16:
5641 return AArch64_AM::SXTH;
5642 case 32:
5643 return AArch64_AM::SXTW;
5644 default:
5645 return AArch64_AM::InvalidShiftExtend;
5646 }
5647 }
5648
5649 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5650 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5651 assert(Size != 64 && "Extend from 64 bits?")((Size != 64 && "Extend from 64 bits?") ? static_cast
<void> (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5651, __PRETTY_FUNCTION__))
;
5652 switch (Size) {
5653 case 8:
5654 return AArch64_AM::UXTB;
5655 case 16:
5656 return AArch64_AM::UXTH;
5657 case 32:
5658 return AArch64_AM::UXTW;
5659 default:
5660 return AArch64_AM::InvalidShiftExtend;
5661 }
5662 }
5663
5664 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5665 // on the RHS.
5666 if (Opc != TargetOpcode::G_AND)
5667 return AArch64_AM::InvalidShiftExtend;
5668
5669 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5670 if (!MaybeAndMask)
5671 return AArch64_AM::InvalidShiftExtend;
5672 uint64_t AndMask = *MaybeAndMask;
5673 switch (AndMask) {
5674 default:
5675 return AArch64_AM::InvalidShiftExtend;
5676 case 0xFF:
5677 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5678 case 0xFFFF:
5679 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5680 case 0xFFFFFFFF:
5681 return AArch64_AM::UXTW;
5682 }
5683}
5684
5685Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
5686 Register ExtReg, MachineIRBuilder &MIB) const {
5687 MachineRegisterInfo &MRI = *MIB.getMRI();
5688 if (MRI.getType(ExtReg).getSizeInBits() == 32)
5689 return ExtReg;
5690
5691 // Insert a copy to move ExtReg to GPR32.
5692 Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5693 auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
5694
5695 // Select the copy into a subregister copy.
5696 selectCopy(*Copy, TII, MRI, TRI, RBI);
5697 return Copy.getReg(0);
5698}
5699
5700Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
5701 Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
5702 assert(WideSize >= 8 && "WideSize is smaller than all possible registers?")((WideSize >= 8 && "WideSize is smaller than all possible registers?"
) ? static_cast<void> (0) : __assert_fail ("WideSize >= 8 && \"WideSize is smaller than all possible registers?\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5702, __PRETTY_FUNCTION__))
;
5703 MachineRegisterInfo &MRI = *MIB.getMRI();
5704 unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
5705 assert(WideSize >= NarrowSize &&((WideSize >= NarrowSize && "WideSize cannot be smaller than NarrowSize!"
) ? static_cast<void> (0) : __assert_fail ("WideSize >= NarrowSize && \"WideSize cannot be smaller than NarrowSize!\""
, "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5706, __PRETTY_FUNCTION__))