Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 884, column 7
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-11-29-190409-37574-1 -x c++ /build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
15#include "AArch64MachineFunctionInfo.h"
16#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
19#include "AArch64TargetMachine.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "MCTargetDesc/AArch64MCTargetDesc.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27#include "llvm/CodeGen/GlobalISel/Utils.h"
28#include "llvm/CodeGen/MachineBasicBlock.h"
29#include "llvm/CodeGen/MachineConstantPool.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineOperand.h"
34#include "llvm/CodeGen/MachineRegisterInfo.h"
35#include "llvm/CodeGen/TargetOpcodes.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/PatternMatch.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/IntrinsicsAArch64.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/raw_ostream.h"
43
44#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49namespace {
50
51#define GET_GLOBALISEL_PREDICATE_BITSET
52#include "AArch64GenGlobalISel.inc"
53#undef GET_GLOBALISEL_PREDICATE_BITSET
54
55class AArch64InstructionSelector : public InstructionSelector {
56public:
57 AArch64InstructionSelector(const AArch64TargetMachine &TM,
58 const AArch64Subtarget &STI,
59 const AArch64RegisterBankInfo &RBI);
60
61 bool select(MachineInstr &I) override;
62 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
63
64 void setupMF(MachineFunction &MF, GISelKnownBits &KB,
65 CodeGenCoverage &CoverageInfo) override {
66 InstructionSelector::setupMF(MF, KB, CoverageInfo);
67
68 // hasFnAttribute() is expensive to call on every BRCOND selection, so
69 // cache it here for each run of the selector.
70 ProduceNonFlagSettingCondBr =
71 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
72 MFReturnAddr = Register();
73
74 processPHIs(MF);
75 }
76
77private:
78 /// tblgen-erated 'select' implementation, used as the initial selector for
79 /// the patterns that don't require complex C++.
80 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
81
82 // A lowering phase that runs before any selection attempts.
83 // Returns true if the instruction was modified.
84 bool preISelLower(MachineInstr &I);
85
86 // An early selection function that runs before the selectImpl() call.
87 bool earlySelect(MachineInstr &I) const;
88
89 // Do some preprocessing of G_PHIs before we begin selection.
90 void processPHIs(MachineFunction &MF);
91
92 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
93
94 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
95 bool contractCrossBankCopyIntoStore(MachineInstr &I,
96 MachineRegisterInfo &MRI);
97
98 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
99
100 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
101 MachineRegisterInfo &MRI) const;
102 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
103 MachineRegisterInfo &MRI) const;
104
105 bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
106 int64_t CmpConstant,
107 const CmpInst::Predicate &Pred,
108 MachineBasicBlock *DstMBB,
109 MachineIRBuilder &MIB) const;
110 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
111 MachineRegisterInfo &MRI) const;
112
113 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
114 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
115
116 // Helper to generate an equivalent of scalar_to_vector into a new register,
117 // returned via 'Dst'.
118 MachineInstr *emitScalarToVector(unsigned EltSize,
119 const TargetRegisterClass *DstRC,
120 Register Scalar,
121 MachineIRBuilder &MIRBuilder) const;
122
123 /// Emit a lane insert into \p DstReg, or a new vector register if None is
124 /// provided.
125 ///
126 /// The lane inserted into is defined by \p LaneIdx. The vector source
127 /// register is given by \p SrcReg. The register containing the element is
128 /// given by \p EltReg.
129 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
130 Register EltReg, unsigned LaneIdx,
131 const RegisterBank &RB,
132 MachineIRBuilder &MIRBuilder) const;
133 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
134 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
135 MachineRegisterInfo &MRI) const;
136 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
137 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
138 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
139
140 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
141 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
142 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
143 bool selectSplitVectorUnmerge(MachineInstr &I,
144 MachineRegisterInfo &MRI) const;
145 bool selectIntrinsicWithSideEffects(MachineInstr &I,
146 MachineRegisterInfo &MRI) const;
147 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
148 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
149 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
150 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
151 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
152 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
153 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
154 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
155
156 unsigned emitConstantPoolEntry(const Constant *CPVal,
157 MachineFunction &MF) const;
158 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
159 MachineIRBuilder &MIRBuilder) const;
160
161 // Emit a vector concat operation.
162 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
163 Register Op2,
164 MachineIRBuilder &MIRBuilder) const;
165
166 // Emit an integer compare between LHS and RHS, which checks for Predicate.
167 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
168 MachineOperand &Predicate,
169 MachineIRBuilder &MIRBuilder) const;
170
171 /// Emit a floating point comparison between \p LHS and \p RHS.
172 MachineInstr *emitFPCompare(Register LHS, Register RHS,
173 MachineIRBuilder &MIRBuilder) const;
174
175 MachineInstr *emitInstr(unsigned Opcode,
176 std::initializer_list<llvm::DstOp> DstOps,
177 std::initializer_list<llvm::SrcOp> SrcOps,
178 MachineIRBuilder &MIRBuilder,
179 const ComplexRendererFns &RenderFns = None) const;
180 /// Helper function to emit an add or sub instruction.
181 ///
182 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
183 /// in a specific order.
184 ///
185 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
186 ///
187 /// \code
188 /// const std::array<std::array<unsigned, 2>, 4> Table {
189 /// {{AArch64::ADDXri, AArch64::ADDWri},
190 /// {AArch64::ADDXrs, AArch64::ADDWrs},
191 /// {AArch64::ADDXrr, AArch64::ADDWrr},
192 /// {AArch64::SUBXri, AArch64::SUBWri},
193 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
194 /// \endcode
195 ///
196 /// Each row in the table corresponds to a different addressing mode. Each
197 /// column corresponds to a different register size.
198 ///
199 /// \attention Rows must be structured as follows:
200 /// - Row 0: The ri opcode variants
201 /// - Row 1: The rs opcode variants
202 /// - Row 2: The rr opcode variants
203 /// - Row 3: The ri opcode variants for negative immediates
204 /// - Row 4: The rx opcode variants
205 ///
206 /// \attention Columns must be structured as follows:
207 /// - Column 0: The 64-bit opcode variants
208 /// - Column 1: The 32-bit opcode variants
209 ///
210 /// \p Dst is the destination register of the binop to emit.
211 /// \p LHS is the left-hand operand of the binop to emit.
212 /// \p RHS is the right-hand operand of the binop to emit.
213 MachineInstr *emitAddSub(
214 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
215 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
216 MachineIRBuilder &MIRBuilder) const;
217 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
218 MachineOperand &RHS,
219 MachineIRBuilder &MIRBuilder) const;
220 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
221 MachineIRBuilder &MIRBuilder) const;
222 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
223 MachineIRBuilder &MIRBuilder) const;
224 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
225 MachineIRBuilder &MIRBuilder) const;
226 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
227 MachineIRBuilder &MIRBuilder) const;
228 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
229 AArch64CC::CondCode CC,
230 MachineIRBuilder &MIRBuilder) const;
231 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
232 const RegisterBank &DstRB, LLT ScalarTy,
233 Register VecReg, unsigned LaneIdx,
234 MachineIRBuilder &MIRBuilder) const;
235
236 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
237 /// materialized using a FMOV instruction, then update MI and return it.
238 /// Otherwise, do nothing and return a nullptr.
239 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
240 MachineRegisterInfo &MRI) const;
241
242 /// Emit a CSet for an integer compare.
243 ///
244 /// \p DefReg is expected to be a 32-bit scalar register.
245 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
246 MachineIRBuilder &MIRBuilder) const;
247 /// Emit a CSet for a FP compare.
248 ///
249 /// \p Dst is expected to be a 32-bit scalar register.
250 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
251 MachineIRBuilder &MIRBuilder) const;
252
253 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
254 /// \p IsNegative is true if the test should be "not zero".
255 /// This will also optimize the test bit instruction when possible.
256 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
257 MachineBasicBlock *DstMBB,
258 MachineIRBuilder &MIB) const;
259
260 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
261 // We use these manually instead of using the importer since it doesn't
262 // support SDNodeXForm.
263 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
264 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
265 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
266 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
267
268 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
269 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
270 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
271
272 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
273 unsigned Size) const;
274
275 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
276 return selectAddrModeUnscaled(Root, 1);
277 }
278 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
279 return selectAddrModeUnscaled(Root, 2);
280 }
281 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
282 return selectAddrModeUnscaled(Root, 4);
283 }
284 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
285 return selectAddrModeUnscaled(Root, 8);
286 }
287 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
288 return selectAddrModeUnscaled(Root, 16);
289 }
290
291 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
292 /// from complex pattern matchers like selectAddrModeIndexed().
293 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
294 MachineRegisterInfo &MRI) const;
295
296 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
297 unsigned Size) const;
298 template <int Width>
299 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
300 return selectAddrModeIndexed(Root, Width / 8);
301 }
302
303 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
304 const MachineRegisterInfo &MRI) const;
305 ComplexRendererFns
306 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
307 unsigned SizeInBytes) const;
308
309 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
310 /// or not a shift + extend should be folded into an addressing mode. Returns
311 /// None when this is not profitable or possible.
312 ComplexRendererFns
313 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
314 MachineOperand &Offset, unsigned SizeInBytes,
315 bool WantsExt) const;
316 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
317 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
318 unsigned SizeInBytes) const;
319 template <int Width>
320 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
321 return selectAddrModeXRO(Root, Width / 8);
322 }
323
324 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
325 unsigned SizeInBytes) const;
326 template <int Width>
327 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
328 return selectAddrModeWRO(Root, Width / 8);
329 }
330
331 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
332
333 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
334 return selectShiftedRegister(Root);
335 }
336
337 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
338 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
339 // For now, make them the same. The only difference between the two is that
340 // logical shifts are allowed to fold in rotates. Otherwise, these are
341 // functionally the same.
342 return selectShiftedRegister(Root);
343 }
344
345 /// Given an extend instruction, determine the correct shift-extend type for
346 /// that instruction.
347 ///
348 /// If the instruction is going to be used in a load or store, pass
349 /// \p IsLoadStore = true.
350 AArch64_AM::ShiftExtendType
351 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
352 bool IsLoadStore = false) const;
353
354 /// Instructions that accept extend modifiers like UXTW expect the register
355 /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
356 /// subregister copy if necessary. Return either ExtReg, or the result of the
357 /// new copy.
358 Register narrowExtendRegIfNeeded(Register ExtReg,
359 MachineIRBuilder &MIB) const;
360 Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
361 MachineIRBuilder &MIB) const;
362 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
363
364 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
365 int OpIdx = -1) const;
366 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
367 int OpIdx = -1) const;
368 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
369 int OpIdx = -1) const;
370
371 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
372 void materializeLargeCMVal(MachineInstr &I, const Value *V,
373 unsigned OpFlags) const;
374
375 // Optimization methods.
376 bool tryOptSelect(MachineInstr &MI) const;
377 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
378 MachineOperand &Predicate,
379 MachineIRBuilder &MIRBuilder) const;
380
381 /// Return true if \p MI is a load or store of \p NumBytes bytes.
382 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
383
384 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
385 /// register zeroed out. In other words, the result of MI has been explicitly
386 /// zero extended.
387 bool isDef32(const MachineInstr &MI) const;
388
389 const AArch64TargetMachine &TM;
390 const AArch64Subtarget &STI;
391 const AArch64InstrInfo &TII;
392 const AArch64RegisterInfo &TRI;
393 const AArch64RegisterBankInfo &RBI;
394
395 bool ProduceNonFlagSettingCondBr = false;
396
397 // Some cached values used during selection.
398 // We use LR as a live-in register, and we keep track of it here as it can be
399 // clobbered by calls.
400 Register MFReturnAddr;
401
402#define GET_GLOBALISEL_PREDICATES_DECL
403#include "AArch64GenGlobalISel.inc"
404#undef GET_GLOBALISEL_PREDICATES_DECL
405
406// We declare the temporaries used by selectImpl() in the class to minimize the
407// cost of constructing placeholder values.
408#define GET_GLOBALISEL_TEMPORARIES_DECL
409#include "AArch64GenGlobalISel.inc"
410#undef GET_GLOBALISEL_TEMPORARIES_DECL
411};
412
413} // end anonymous namespace
414
415#define GET_GLOBALISEL_IMPL
416#include "AArch64GenGlobalISel.inc"
417#undef GET_GLOBALISEL_IMPL
418
419AArch64InstructionSelector::AArch64InstructionSelector(
420 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
421 const AArch64RegisterBankInfo &RBI)
422 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
423 TRI(*STI.getRegisterInfo()), RBI(RBI),
424#define GET_GLOBALISEL_PREDICATES_INIT
425#include "AArch64GenGlobalISel.inc"
426#undef GET_GLOBALISEL_PREDICATES_INIT
427#define GET_GLOBALISEL_TEMPORARIES_INIT
428#include "AArch64GenGlobalISel.inc"
429#undef GET_GLOBALISEL_TEMPORARIES_INIT
430{
431}
432
433// FIXME: This should be target-independent, inferred from the types declared
434// for each class in the bank.
435static const TargetRegisterClass *
436getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
437 const RegisterBankInfo &RBI,
438 bool GetAllRegSet = false) {
439 if (RB.getID() == AArch64::GPRRegBankID) {
440 if (Ty.getSizeInBits() <= 32)
441 return GetAllRegSet ? &AArch64::GPR32allRegClass
442 : &AArch64::GPR32RegClass;
443 if (Ty.getSizeInBits() == 64)
444 return GetAllRegSet ? &AArch64::GPR64allRegClass
445 : &AArch64::GPR64RegClass;
446 return nullptr;
447 }
448
449 if (RB.getID() == AArch64::FPRRegBankID) {
450 if (Ty.getSizeInBits() <= 16)
451 return &AArch64::FPR16RegClass;
452 if (Ty.getSizeInBits() == 32)
453 return &AArch64::FPR32RegClass;
454 if (Ty.getSizeInBits() == 64)
455 return &AArch64::FPR64RegClass;
456 if (Ty.getSizeInBits() == 128)
457 return &AArch64::FPR128RegClass;
458 return nullptr;
459 }
460
461 return nullptr;
462}
463
464/// Given a register bank, and size in bits, return the smallest register class
465/// that can represent that combination.
466static const TargetRegisterClass *
467getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
468 bool GetAllRegSet = false) {
469 unsigned RegBankID = RB.getID();
470
471 if (RegBankID == AArch64::GPRRegBankID) {
472 if (SizeInBits <= 32)
473 return GetAllRegSet ? &AArch64::GPR32allRegClass
474 : &AArch64::GPR32RegClass;
475 if (SizeInBits == 64)
476 return GetAllRegSet ? &AArch64::GPR64allRegClass
477 : &AArch64::GPR64RegClass;
478 }
479
480 if (RegBankID == AArch64::FPRRegBankID) {
481 switch (SizeInBits) {
482 default:
483 return nullptr;
484 case 8:
485 return &AArch64::FPR8RegClass;
486 case 16:
487 return &AArch64::FPR16RegClass;
488 case 32:
489 return &AArch64::FPR32RegClass;
490 case 64:
491 return &AArch64::FPR64RegClass;
492 case 128:
493 return &AArch64::FPR128RegClass;
494 }
495 }
496
497 return nullptr;
498}
499
500/// Returns the correct subregister to use for a given register class.
501static bool getSubRegForClass(const TargetRegisterClass *RC,
502 const TargetRegisterInfo &TRI, unsigned &SubReg) {
503 switch (TRI.getRegSizeInBits(*RC)) {
56
Control jumps to the 'default' case at line 519
504 case 8:
505 SubReg = AArch64::bsub;
506 break;
507 case 16:
508 SubReg = AArch64::hsub;
509 break;
510 case 32:
511 if (RC != &AArch64::FPR32RegClass)
512 SubReg = AArch64::sub_32;
513 else
514 SubReg = AArch64::ssub;
515 break;
516 case 64:
517 SubReg = AArch64::dsub;
518 break;
519 default:
520 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
57
Assuming 'DebugFlag' is false
58
Loop condition is false. Exiting loop
521 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
522 return false;
59
Returning without writing to 'SubReg'
523 }
524
525 return true;
526}
527
528/// Returns the minimum size the given register bank can hold.
529static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
530 switch (RB.getID()) {
531 case AArch64::GPRRegBankID:
532 return 32;
533 case AArch64::FPRRegBankID:
534 return 8;
535 default:
536 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 536)
;
537 }
538}
539
540static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
541 auto &MI = *Root.getParent();
542 auto &MBB = *MI.getParent();
543 auto &MF = *MBB.getParent();
544 auto &MRI = MF.getRegInfo();
545 uint64_t Immed;
546 if (Root.isImm())
547 Immed = Root.getImm();
548 else if (Root.isCImm())
549 Immed = Root.getCImm()->getZExtValue();
550 else if (Root.isReg()) {
551 auto ValAndVReg =
552 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
553 if (!ValAndVReg)
554 return None;
555 Immed = ValAndVReg->Value;
556 } else
557 return None;
558 return Immed;
559}
560
561/// Check whether \p I is a currently unsupported binary operation:
562/// - it has an unsized type
563/// - an operand is not a vreg
564/// - all operands are not in the same bank
565/// These are checks that should someday live in the verifier, but right now,
566/// these are mostly limitations of the aarch64 selector.
567static bool unsupportedBinOp(const MachineInstr &I,
568 const AArch64RegisterBankInfo &RBI,
569 const MachineRegisterInfo &MRI,
570 const AArch64RegisterInfo &TRI) {
571 LLT Ty = MRI.getType(I.getOperand(0).getReg());
572 if (!Ty.isValid()) {
573 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
574 return true;
575 }
576
577 const RegisterBank *PrevOpBank = nullptr;
578 for (auto &MO : I.operands()) {
579 // FIXME: Support non-register operands.
580 if (!MO.isReg()) {
581 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
582 return true;
583 }
584
585 // FIXME: Can generic operations have physical registers operands? If
586 // so, this will need to be taught about that, and we'll need to get the
587 // bank out of the minimal class for the register.
588 // Either way, this needs to be documented (and possibly verified).
589 if (!Register::isVirtualRegister(MO.getReg())) {
590 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
591 return true;
592 }
593
594 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
595 if (!OpBank) {
596 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
597 return true;
598 }
599
600 if (PrevOpBank && OpBank != PrevOpBank) {
601 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
602 return true;
603 }
604 PrevOpBank = OpBank;
605 }
606 return false;
607}
608
609/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
610/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
611/// and of size \p OpSize.
612/// \returns \p GenericOpc if the combination is unsupported.
613static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
614 unsigned OpSize) {
615 switch (RegBankID) {
616 case AArch64::GPRRegBankID:
617 if (OpSize == 32) {
618 switch (GenericOpc) {
619 case TargetOpcode::G_SHL:
620 return AArch64::LSLVWr;
621 case TargetOpcode::G_LSHR:
622 return AArch64::LSRVWr;
623 case TargetOpcode::G_ASHR:
624 return AArch64::ASRVWr;
625 default:
626 return GenericOpc;
627 }
628 } else if (OpSize == 64) {
629 switch (GenericOpc) {
630 case TargetOpcode::G_PTR_ADD:
631 return AArch64::ADDXrr;
632 case TargetOpcode::G_SHL:
633 return AArch64::LSLVXr;
634 case TargetOpcode::G_LSHR:
635 return AArch64::LSRVXr;
636 case TargetOpcode::G_ASHR:
637 return AArch64::ASRVXr;
638 default:
639 return GenericOpc;
640 }
641 }
642 break;
643 case AArch64::FPRRegBankID:
644 switch (OpSize) {
645 case 32:
646 switch (GenericOpc) {
647 case TargetOpcode::G_FADD:
648 return AArch64::FADDSrr;
649 case TargetOpcode::G_FSUB:
650 return AArch64::FSUBSrr;
651 case TargetOpcode::G_FMUL:
652 return AArch64::FMULSrr;
653 case TargetOpcode::G_FDIV:
654 return AArch64::FDIVSrr;
655 default:
656 return GenericOpc;
657 }
658 case 64:
659 switch (GenericOpc) {
660 case TargetOpcode::G_FADD:
661 return AArch64::FADDDrr;
662 case TargetOpcode::G_FSUB:
663 return AArch64::FSUBDrr;
664 case TargetOpcode::G_FMUL:
665 return AArch64::FMULDrr;
666 case TargetOpcode::G_FDIV:
667 return AArch64::FDIVDrr;
668 case TargetOpcode::G_OR:
669 return AArch64::ORRv8i8;
670 default:
671 return GenericOpc;
672 }
673 }
674 break;
675 }
676 return GenericOpc;
677}
678
679/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
680/// appropriate for the (value) register bank \p RegBankID and of memory access
681/// size \p OpSize. This returns the variant with the base+unsigned-immediate
682/// addressing mode (e.g., LDRXui).
683/// \returns \p GenericOpc if the combination is unsupported.
684static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
685 unsigned OpSize) {
686 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
687 switch (RegBankID) {
688 case AArch64::GPRRegBankID:
689 switch (OpSize) {
690 case 8:
691 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
692 case 16:
693 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
694 case 32:
695 return isStore ? AArch64::STRWui : AArch64::LDRWui;
696 case 64:
697 return isStore ? AArch64::STRXui : AArch64::LDRXui;
698 }
699 break;
700 case AArch64::FPRRegBankID:
701 switch (OpSize) {
702 case 8:
703 return isStore ? AArch64::STRBui : AArch64::LDRBui;
704 case 16:
705 return isStore ? AArch64::STRHui : AArch64::LDRHui;
706 case 32:
707 return isStore ? AArch64::STRSui : AArch64::LDRSui;
708 case 64:
709 return isStore ? AArch64::STRDui : AArch64::LDRDui;
710 }
711 break;
712 }
713 return GenericOpc;
714}
715
716#ifndef NDEBUG
717/// Helper function that verifies that we have a valid copy at the end of
718/// selectCopy. Verifies that the source and dest have the expected sizes and
719/// then returns true.
720static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
721 const MachineRegisterInfo &MRI,
722 const TargetRegisterInfo &TRI,
723 const RegisterBankInfo &RBI) {
724 const Register DstReg = I.getOperand(0).getReg();
725 const Register SrcReg = I.getOperand(1).getReg();
726 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
727 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
728
729 // Make sure the size of the source and dest line up.
730 assert((((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
731 (DstSize == SrcSize ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
732 // Copies are a mean to setup initial types, the number of(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
733 // bits may not exactly match.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
734 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
735 // Copies are a mean to copy bits around, as long as we are(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
736 // on the same register class, that's fine. Otherwise, that(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
737 // means we need some SUBREG_TO_REG or AND & co.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
738 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
739 "Copy with different width?!")(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 739, __PRETTY_FUNCTION__))
;
740
741 // Check the size of the destination.
742 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 743, __PRETTY_FUNCTION__))
743 "GPRs cannot get more than 64-bit width values")(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 743, __PRETTY_FUNCTION__))
;
744
745 return true;
746}
747#endif
748
749/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
750/// to \p *To.
751///
752/// E.g "To = COPY SrcReg:SubReg"
753static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
754 const RegisterBankInfo &RBI, Register SrcReg,
755 const TargetRegisterClass *To, unsigned SubReg) {
756 assert(SrcReg.isValid() && "Expected a valid source register?")((SrcReg.isValid() && "Expected a valid source register?"
) ? static_cast<void> (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 756, __PRETTY_FUNCTION__))
;
757 assert(To && "Destination register class cannot be null")((To && "Destination register class cannot be null") ?
static_cast<void> (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 757, __PRETTY_FUNCTION__))
;
758 assert(SubReg && "Expected a valid subregister")((SubReg && "Expected a valid subregister") ? static_cast
<void> (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 758, __PRETTY_FUNCTION__))
;
759
760 MachineIRBuilder MIB(I);
761 auto SubRegCopy =
762 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
763 MachineOperand &RegOp = I.getOperand(1);
764 RegOp.setReg(SubRegCopy.getReg(0));
765
766 // It's possible that the destination register won't be constrained. Make
767 // sure that happens.
768 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
769 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
770
771 return true;
772}
773
774/// Helper function to get the source and destination register classes for a
775/// copy. Returns a std::pair containing the source register class for the
776/// copy, and the destination register class for the copy. If a register class
777/// cannot be determined, then it will be nullptr.
778static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
779getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
780 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
781 const RegisterBankInfo &RBI) {
782 Register DstReg = I.getOperand(0).getReg();
783 Register SrcReg = I.getOperand(1).getReg();
784 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
785 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
786 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
787 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
788
789 // Special casing for cross-bank copies of s1s. We can technically represent
790 // a 1-bit value with any size of register. The minimum size for a GPR is 32
791 // bits. So, we need to put the FPR on 32 bits as well.
792 //
793 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
794 // then we can pull it into the helpers that get the appropriate class for a
795 // register bank. Or make a new helper that carries along some constraint
796 // information.
797 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
798 SrcSize = DstSize = 32;
799
800 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
801 getMinClassForRegBank(DstRegBank, DstSize, true)};
802}
803
804static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
805 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
806 const RegisterBankInfo &RBI) {
807 Register DstReg = I.getOperand(0).getReg();
808 Register SrcReg = I.getOperand(1).getReg();
809 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
810 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
811
812 // Find the correct register classes for the source and destination registers.
813 const TargetRegisterClass *SrcRC;
814 const TargetRegisterClass *DstRC;
815 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
23
Calling 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
34
Returning from 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
35
Calling 'tuple::operator='
38
Returning from 'tuple::operator='
816
817 if (!DstRC) {
39
Assuming 'DstRC' is non-null
40
Taking false branch
818 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
819 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
820 return false;
821 }
822
823 // A couple helpers below, for making sure that the copy we produce is valid.
824
825 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
826 // to verify that the src and dst are the same size, since that's handled by
827 // the SUBREG_TO_REG.
828 bool KnownValid = false;
829
830 // Returns true, or asserts if something we don't expect happens. Instead of
831 // returning true, we return isValidCopy() to ensure that we verify the
832 // result.
833 auto CheckCopy = [&]() {
834 // If we have a bitcast or something, we can't have physical registers.
835 assert((I.isCopy() ||(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 838, __PRETTY_FUNCTION__))
836 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 838, __PRETTY_FUNCTION__))
837 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 838, __PRETTY_FUNCTION__))
838 "No phys reg on generic operator!")(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 838, __PRETTY_FUNCTION__))
;
839 bool ValidCopy = true;
840#ifndef NDEBUG
841 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
842 assert(ValidCopy && "Invalid copy.")((ValidCopy && "Invalid copy.") ? static_cast<void
> (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __PRETTY_FUNCTION__))
;
843#endif
844 return ValidCopy;
845 };
846
847 // Is this a copy? If so, then we may need to insert a subregister copy.
848 if (I.isCopy()) {
41
Calling 'MachineInstr::isCopy'
44
Returning from 'MachineInstr::isCopy'
45
Taking true branch
849 // Yes. Check if there's anything to fix up.
850 if (!SrcRC) {
46
Assuming 'SrcRC' is non-null
47
Taking false branch
851 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
852 return false;
853 }
854
855 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
856 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
857 unsigned SubReg;
48
'SubReg' declared without an initial value
858
859 // If the source bank doesn't support a subregister copy small enough,
860 // then we first need to copy to the destination bank.
861 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
49
Assuming the condition is false
50
Taking false branch
862 const TargetRegisterClass *DstTempRC =
863 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
864 getSubRegForClass(DstRC, TRI, SubReg);
865
866 MachineIRBuilder MIB(I);
867 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
868 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
869 } else if (SrcSize > DstSize) {
51
Assuming 'SrcSize' is <= 'DstSize'
52
Taking false branch
870 // If the source register is bigger than the destination we need to
871 // perform a subregister copy.
872 const TargetRegisterClass *SubRegRC =
873 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
874 getSubRegForClass(SubRegRC, TRI, SubReg);
875 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
876 } else if (DstSize > SrcSize) {
53
Assuming 'DstSize' is > 'SrcSize'
54
Taking true branch
877 // If the destination register is bigger than the source we need to do
878 // a promotion using SUBREG_TO_REG.
879 const TargetRegisterClass *PromotionRC =
880 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
881 getSubRegForClass(SrcRC, TRI, SubReg);
55
Calling 'getSubRegForClass'
60
Returning from 'getSubRegForClass'
882
883 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
884 BuildMI(*I.getParent(), I, I.getDebugLoc(),
61
1st function call argument is an uninitialized value
885 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
886 .addImm(0)
887 .addUse(SrcReg)
888 .addImm(SubReg);
889 MachineOperand &RegOp = I.getOperand(1);
890 RegOp.setReg(PromoteReg);
891
892 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
893 KnownValid = true;
894 }
895
896 // If the destination is a physical register, then there's nothing to
897 // change, so we're done.
898 if (Register::isPhysicalRegister(DstReg))
899 return CheckCopy();
900 }
901
902 // No need to constrain SrcReg. It will get constrained when we hit another
903 // of its use or its defs. Copies do not have constraints.
904 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
905 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
906 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
907 return false;
908 }
909 I.setDesc(TII.get(AArch64::COPY));
910 return CheckCopy();
911}
912
913static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
914 if (!DstTy.isScalar() || !SrcTy.isScalar())
915 return GenericOpc;
916
917 const unsigned DstSize = DstTy.getSizeInBits();
918 const unsigned SrcSize = SrcTy.getSizeInBits();
919
920 switch (DstSize) {
921 case 32:
922 switch (SrcSize) {
923 case 32:
924 switch (GenericOpc) {
925 case TargetOpcode::G_SITOFP:
926 return AArch64::SCVTFUWSri;
927 case TargetOpcode::G_UITOFP:
928 return AArch64::UCVTFUWSri;
929 case TargetOpcode::G_FPTOSI:
930 return AArch64::FCVTZSUWSr;
931 case TargetOpcode::G_FPTOUI:
932 return AArch64::FCVTZUUWSr;
933 default:
934 return GenericOpc;
935 }
936 case 64:
937 switch (GenericOpc) {
938 case TargetOpcode::G_SITOFP:
939 return AArch64::SCVTFUXSri;
940 case TargetOpcode::G_UITOFP:
941 return AArch64::UCVTFUXSri;
942 case TargetOpcode::G_FPTOSI:
943 return AArch64::FCVTZSUWDr;
944 case TargetOpcode::G_FPTOUI:
945 return AArch64::FCVTZUUWDr;
946 default:
947 return GenericOpc;
948 }
949 default:
950 return GenericOpc;
951 }
952 case 64:
953 switch (SrcSize) {
954 case 32:
955 switch (GenericOpc) {
956 case TargetOpcode::G_SITOFP:
957 return AArch64::SCVTFUWDri;
958 case TargetOpcode::G_UITOFP:
959 return AArch64::UCVTFUWDri;
960 case TargetOpcode::G_FPTOSI:
961 return AArch64::FCVTZSUXSr;
962 case TargetOpcode::G_FPTOUI:
963 return AArch64::FCVTZUUXSr;
964 default:
965 return GenericOpc;
966 }
967 case 64:
968 switch (GenericOpc) {
969 case TargetOpcode::G_SITOFP:
970 return AArch64::SCVTFUXDri;
971 case TargetOpcode::G_UITOFP:
972 return AArch64::UCVTFUXDri;
973 case TargetOpcode::G_FPTOSI:
974 return AArch64::FCVTZSUXDr;
975 case TargetOpcode::G_FPTOUI:
976 return AArch64::FCVTZUUXDr;
977 default:
978 return GenericOpc;
979 }
980 default:
981 return GenericOpc;
982 }
983 default:
984 return GenericOpc;
985 };
986 return GenericOpc;
987}
988
989MachineInstr *
990AArch64InstructionSelector::emitSelect(Register Dst, Register True,
991 Register False, AArch64CC::CondCode CC,
992 MachineIRBuilder &MIB) const {
993 MachineRegisterInfo &MRI = *MIB.getMRI();
994 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 996, __PRETTY_FUNCTION__))
995 RBI.getRegBank(True, MRI, TRI)->getID() &&((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 996, __PRETTY_FUNCTION__))
996 "Expected both select operands to have the same regbank?")((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 996, __PRETTY_FUNCTION__))
;
997 LLT Ty = MRI.getType(True);
998 if (Ty.isVector())
999 return nullptr;
1000 const unsigned Size = Ty.getSizeInBits();
1001 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1002, __PRETTY_FUNCTION__))
1002 "Expected 32 bit or 64 bit select only?")(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1002, __PRETTY_FUNCTION__))
;
1003 const bool Is32Bit = Size == 32;
1004 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1005 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1006 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1007 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1008 return &*FCSel;
1009 }
1010
1011 // By default, we'll try and emit a CSEL.
1012 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1013 bool Optimized = false;
1014 auto TryFoldBinOpIntoSelect = [&Opc, &False, Is32Bit, &MRI]() {
1015 // Attempt to fold:
1016 //
1017 // sub = G_SUB 0, x
1018 // select = G_SELECT cc, true, sub
1019 //
1020 // Into:
1021 // select = CSNEG true, x, cc
1022 Register MatchReg;
1023 if (mi_match(False, MRI, m_Neg(m_Reg(MatchReg)))) {
1024 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1025 False = MatchReg;
1026 return true;
1027 }
1028
1029 // Attempt to fold:
1030 //
1031 // xor = G_XOR x, -1
1032 // select = G_SELECT cc, true, xor
1033 //
1034 // Into:
1035 // select = CSINV true, x, cc
1036 if (mi_match(False, MRI, m_Not(m_Reg(MatchReg)))) {
1037 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1038 False = MatchReg;
1039 return true;
1040 }
1041
1042 return false;
1043 };
1044
1045 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1046 // true/false values are constants.
1047 // FIXME: All of these patterns already exist in tablegen. We should be
1048 // able to import these.
1049 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1050 &Optimized]() {
1051 if (Optimized)
1052 return false;
1053 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1054 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1055 if (!TrueCst && !FalseCst)
1056 return false;
1057
1058 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1059 if (TrueCst && FalseCst) {
1060 auto T = TrueCst->Value;
1061 auto F = FalseCst->Value;
1062
1063 if (T == 0 && F == 1) {
1064 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1065 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1066 True = ZReg;
1067 False = ZReg;
1068 return true;
1069 }
1070
1071 if (T == 0 && F == -1) {
1072 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1073 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1074 True = ZReg;
1075 False = ZReg;
1076 return true;
1077 }
1078 }
1079
1080 if (TrueCst) {
1081 auto T = TrueCst->Value;
1082 if (T == 1) {
1083 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1084 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1085 True = False;
1086 False = ZReg;
1087 CC = AArch64CC::getInvertedCondCode(CC);
1088 return true;
1089 }
1090
1091 if (T == -1) {
1092 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1093 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1094 True = False;
1095 False = ZReg;
1096 CC = AArch64CC::getInvertedCondCode(CC);
1097 return true;
1098 }
1099 }
1100
1101 if (FalseCst) {
1102 auto F = FalseCst->Value;
1103 if (F == 1) {
1104 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1105 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1106 False = ZReg;
1107 return true;
1108 }
1109
1110 if (F == -1) {
1111 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1112 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1113 False = ZReg;
1114 return true;
1115 }
1116 }
1117 return false;
1118 };
1119
1120 Optimized |= TryFoldBinOpIntoSelect();
1121 Optimized |= TryOptSelectCst();
1122 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1123 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1124 return &*SelectInst;
1125}
1126
1127static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1128 switch (P) {
1129 default:
1130 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1130)
;
1131 case CmpInst::ICMP_NE:
1132 return AArch64CC::NE;
1133 case CmpInst::ICMP_EQ:
1134 return AArch64CC::EQ;
1135 case CmpInst::ICMP_SGT:
1136 return AArch64CC::GT;
1137 case CmpInst::ICMP_SGE:
1138 return AArch64CC::GE;
1139 case CmpInst::ICMP_SLT:
1140 return AArch64CC::LT;
1141 case CmpInst::ICMP_SLE:
1142 return AArch64CC::LE;
1143 case CmpInst::ICMP_UGT:
1144 return AArch64CC::HI;
1145 case CmpInst::ICMP_UGE:
1146 return AArch64CC::HS;
1147 case CmpInst::ICMP_ULT:
1148 return AArch64CC::LO;
1149 case CmpInst::ICMP_ULE:
1150 return AArch64CC::LS;
1151 }
1152}
1153
1154static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1155 AArch64CC::CondCode &CondCode,
1156 AArch64CC::CondCode &CondCode2) {
1157 CondCode2 = AArch64CC::AL;
1158 switch (P) {
1159 default:
1160 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1160)
;
1161 case CmpInst::FCMP_OEQ:
1162 CondCode = AArch64CC::EQ;
1163 break;
1164 case CmpInst::FCMP_OGT:
1165 CondCode = AArch64CC::GT;
1166 break;
1167 case CmpInst::FCMP_OGE:
1168 CondCode = AArch64CC::GE;
1169 break;
1170 case CmpInst::FCMP_OLT:
1171 CondCode = AArch64CC::MI;
1172 break;
1173 case CmpInst::FCMP_OLE:
1174 CondCode = AArch64CC::LS;
1175 break;
1176 case CmpInst::FCMP_ONE:
1177 CondCode = AArch64CC::MI;
1178 CondCode2 = AArch64CC::GT;
1179 break;
1180 case CmpInst::FCMP_ORD:
1181 CondCode = AArch64CC::VC;
1182 break;
1183 case CmpInst::FCMP_UNO:
1184 CondCode = AArch64CC::VS;
1185 break;
1186 case CmpInst::FCMP_UEQ:
1187 CondCode = AArch64CC::EQ;
1188 CondCode2 = AArch64CC::VS;
1189 break;
1190 case CmpInst::FCMP_UGT:
1191 CondCode = AArch64CC::HI;
1192 break;
1193 case CmpInst::FCMP_UGE:
1194 CondCode = AArch64CC::PL;
1195 break;
1196 case CmpInst::FCMP_ULT:
1197 CondCode = AArch64CC::LT;
1198 break;
1199 case CmpInst::FCMP_ULE:
1200 CondCode = AArch64CC::LE;
1201 break;
1202 case CmpInst::FCMP_UNE:
1203 CondCode = AArch64CC::NE;
1204 break;
1205 }
1206}
1207
1208/// Return a register which can be used as a bit to test in a TB(N)Z.
1209static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1210 MachineRegisterInfo &MRI) {
1211 assert(Reg.isValid() && "Expected valid register!")((Reg.isValid() && "Expected valid register!") ? static_cast
<void> (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1211, __PRETTY_FUNCTION__))
;
1212 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1213 unsigned Opc = MI->getOpcode();
1214
1215 if (!MI->getOperand(0).isReg() ||
1216 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1217 break;
1218
1219 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1220 //
1221 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1222 // on the truncated x is the same as the bit number on x.
1223 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1224 Opc == TargetOpcode::G_TRUNC) {
1225 Register NextReg = MI->getOperand(1).getReg();
1226 // Did we find something worth folding?
1227 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1228 break;
1229
1230 // NextReg is worth folding. Keep looking.
1231 Reg = NextReg;
1232 continue;
1233 }
1234
1235 // Attempt to find a suitable operation with a constant on one side.
1236 Optional<uint64_t> C;
1237 Register TestReg;
1238 switch (Opc) {
1239 default:
1240 break;
1241 case TargetOpcode::G_AND:
1242 case TargetOpcode::G_XOR: {
1243 TestReg = MI->getOperand(1).getReg();
1244 Register ConstantReg = MI->getOperand(2).getReg();
1245 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1246 if (!VRegAndVal) {
1247 // AND commutes, check the other side for a constant.
1248 // FIXME: Can we canonicalize the constant so that it's always on the
1249 // same side at some point earlier?
1250 std::swap(ConstantReg, TestReg);
1251 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1252 }
1253 if (VRegAndVal)
1254 C = VRegAndVal->Value;
1255 break;
1256 }
1257 case TargetOpcode::G_ASHR:
1258 case TargetOpcode::G_LSHR:
1259 case TargetOpcode::G_SHL: {
1260 TestReg = MI->getOperand(1).getReg();
1261 auto VRegAndVal =
1262 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1263 if (VRegAndVal)
1264 C = VRegAndVal->Value;
1265 break;
1266 }
1267 }
1268
1269 // Didn't find a constant or viable register. Bail out of the loop.
1270 if (!C || !TestReg.isValid())
1271 break;
1272
1273 // We found a suitable instruction with a constant. Check to see if we can
1274 // walk through the instruction.
1275 Register NextReg;
1276 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1277 switch (Opc) {
1278 default:
1279 break;
1280 case TargetOpcode::G_AND:
1281 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1282 if ((*C >> Bit) & 1)
1283 NextReg = TestReg;
1284 break;
1285 case TargetOpcode::G_SHL:
1286 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1287 // the type of the register.
1288 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1289 NextReg = TestReg;
1290 Bit = Bit - *C;
1291 }
1292 break;
1293 case TargetOpcode::G_ASHR:
1294 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1295 // in x
1296 NextReg = TestReg;
1297 Bit = Bit + *C;
1298 if (Bit >= TestRegSize)
1299 Bit = TestRegSize - 1;
1300 break;
1301 case TargetOpcode::G_LSHR:
1302 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1303 if ((Bit + *C) < TestRegSize) {
1304 NextReg = TestReg;
1305 Bit = Bit + *C;
1306 }
1307 break;
1308 case TargetOpcode::G_XOR:
1309 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1310 // appropriate.
1311 //
1312 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1313 //
1314 // tbz x', b -> tbnz x, b
1315 //
1316 // Because x' only has the b-th bit set if x does not.
1317 if ((*C >> Bit) & 1)
1318 Invert = !Invert;
1319 NextReg = TestReg;
1320 break;
1321 }
1322
1323 // Check if we found anything worth folding.
1324 if (!NextReg.isValid())
1325 return Reg;
1326 Reg = NextReg;
1327 }
1328
1329 return Reg;
1330}
1331
1332MachineInstr *AArch64InstructionSelector::emitTestBit(
1333 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1334 MachineIRBuilder &MIB) const {
1335 assert(TestReg.isValid())((TestReg.isValid()) ? static_cast<void> (0) : __assert_fail
("TestReg.isValid()", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1335, __PRETTY_FUNCTION__))
;
1336 assert(ProduceNonFlagSettingCondBr &&((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1337, __PRETTY_FUNCTION__))
1337 "Cannot emit TB(N)Z with speculation tracking!")((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1337, __PRETTY_FUNCTION__))
;
1338 MachineRegisterInfo &MRI = *MIB.getMRI();
1339
1340 // Attempt to optimize the test bit by walking over instructions.
1341 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1342 LLT Ty = MRI.getType(TestReg);
1343 unsigned Size = Ty.getSizeInBits();
1344 assert(!Ty.isVector() && "Expected a scalar!")((!Ty.isVector() && "Expected a scalar!") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1344, __PRETTY_FUNCTION__))
;
1345 assert(Bit < 64 && "Bit is too large!")((Bit < 64 && "Bit is too large!") ? static_cast<
void> (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1345, __PRETTY_FUNCTION__))
;
1346
1347 // When the test register is a 64-bit register, we have to narrow to make
1348 // TBNZW work.
1349 bool UseWReg = Bit < 32;
1350 unsigned NecessarySize = UseWReg ? 32 : 64;
1351 if (Size < NecessarySize)
1352 TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
1353 else if (Size > NecessarySize)
1354 TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
1355
1356 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1357 {AArch64::TBZW, AArch64::TBNZW}};
1358 unsigned Opc = OpcTable[UseWReg][IsNegative];
1359 auto TestBitMI =
1360 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1361 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1362 return &*TestBitMI;
1363}
1364
1365bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1366 MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
1367 MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
1368 // Given something like this:
1369 //
1370 // %x = ...Something...
1371 // %one = G_CONSTANT i64 1
1372 // %zero = G_CONSTANT i64 0
1373 // %and = G_AND %x, %one
1374 // %cmp = G_ICMP intpred(ne), %and, %zero
1375 // %cmp_trunc = G_TRUNC %cmp
1376 // G_BRCOND %cmp_trunc, %bb.3
1377 //
1378 // We want to try and fold the AND into the G_BRCOND and produce either a
1379 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1380 //
1381 // In this case, we'd get
1382 //
1383 // TBNZ %x %bb.3
1384 //
1385 if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
1386 return false;
1387
1388 // Need to be comparing against 0 to fold.
1389 if (CmpConstant != 0)
1390 return false;
1391
1392 MachineRegisterInfo &MRI = *MIB.getMRI();
1393
1394 // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
1395 // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
1396 // so folding would be redundant.
1397 if (Pred != CmpInst::Predicate::ICMP_EQ &&
1398 Pred != CmpInst::Predicate::ICMP_NE)
1399 return false;
1400
1401 // Check if the AND has a constant on its RHS which we can use as a mask.
1402 // If it's a power of 2, then it's the same as checking a specific bit.
1403 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1404 auto MaybeBit =
1405 getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
1406 if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1407 return false;
1408
1409 uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1410 Register TestReg = AndInst->getOperand(1).getReg();
1411 bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
1412
1413 // Emit a TB(N)Z.
1414 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1415 return true;
1416}
1417
1418bool AArch64InstructionSelector::selectCompareBranch(
1419 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1420
1421 const Register CondReg = I.getOperand(0).getReg();
1422 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1423 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1424 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
1425 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
1426
1427 unsigned CCMIOpc = CCMI->getOpcode();
1428 if (CCMIOpc != TargetOpcode::G_ICMP && CCMIOpc != TargetOpcode::G_FCMP)
1429 return false;
1430
1431 MachineIRBuilder MIB(I);
1432 Register LHS = CCMI->getOperand(2).getReg();
1433 Register RHS = CCMI->getOperand(3).getReg();
1434 auto Pred =
1435 static_cast<CmpInst::Predicate>(CCMI->getOperand(1).getPredicate());
1436
1437 if (CCMIOpc == TargetOpcode::G_FCMP) {
1438 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1439 // totally clean. Some of them require two branches to implement.
1440 emitFPCompare(LHS, RHS, MIB);
1441 AArch64CC::CondCode CC1, CC2;
1442 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1443 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1444 if (CC2 != AArch64CC::AL)
1445 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1446 I.eraseFromParent();
1447 return true;
1448 }
1449
1450 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1451 MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
1452
1453 // When we can emit a TB(N)Z, prefer that.
1454 //
1455 // Handle non-commutative condition codes first.
1456 // Note that we don't want to do this when we have a G_AND because it can
1457 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1458 if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
1459 int64_t C = VRegAndVal->Value;
1460
1461 // When we have a greater-than comparison, we can just test if the msb is
1462 // zero.
1463 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1464 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1465 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1466 I.eraseFromParent();
1467 return true;
1468 }
1469
1470 // When we have a less than comparison, we can just test if the msb is not
1471 // zero.
1472 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1473 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1474 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1475 I.eraseFromParent();
1476 return true;
1477 }
1478 }
1479
1480 if (!VRegAndVal) {
1481 std::swap(RHS, LHS);
1482 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1483 LHSMI = getDefIgnoringCopies(LHS, MRI);
1484 }
1485
1486 if (!VRegAndVal || VRegAndVal->Value != 0) {
1487 // If we can't select a CBZ then emit a cmp + Bcc.
1488 auto Pred =
1489 static_cast<CmpInst::Predicate>(CCMI->getOperand(1).getPredicate());
1490 emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
1491 CCMI->getOperand(1), MIB);
1492 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
1493 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1494 I.eraseFromParent();
1495 return true;
1496 }
1497
1498 // Try to emit a TB(N)Z for an eq or ne condition.
1499 if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
1500 MIB)) {
1501 I.eraseFromParent();
1502 return true;
1503 }
1504
1505 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1506 if (RB.getID() != AArch64::GPRRegBankID)
1507 return false;
1508 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1509 return false;
1510
1511 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1512 unsigned CBOpc = 0;
1513 if (CmpWidth <= 32)
1514 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1515 else if (CmpWidth == 64)
1516 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1517 else
1518 return false;
1519
1520 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1521 .addUse(LHS)
1522 .addMBB(DestMBB)
1523 .constrainAllUses(TII, TRI, RBI);
1524
1525 I.eraseFromParent();
1526 return true;
1527}
1528
1529/// Returns the element immediate value of a vector shift operand if found.
1530/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1531static Optional<int64_t> getVectorShiftImm(Register Reg,
1532 MachineRegisterInfo &MRI) {
1533 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")((MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1533, __PRETTY_FUNCTION__))
;
1534 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1535 assert(OpMI && "Expected to find a vreg def for vector shift operand")((OpMI && "Expected to find a vreg def for vector shift operand"
) ? static_cast<void> (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1535, __PRETTY_FUNCTION__))
;
1536 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1537 return None;
1538
1539 // Check all operands are identical immediates.
1540 int64_t ImmVal = 0;
1541 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1542 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1543 if (!VRegAndVal)
1544 return None;
1545
1546 if (Idx == 1)
1547 ImmVal = VRegAndVal->Value;
1548 if (ImmVal != VRegAndVal->Value)
1549 return None;
1550 }
1551
1552 return ImmVal;
1553}
1554
1555/// Matches and returns the shift immediate value for a SHL instruction given
1556/// a shift operand.
1557static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1558 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1559 if (!ShiftImm)
1560 return None;
1561 // Check the immediate is in range for a SHL.
1562 int64_t Imm = *ShiftImm;
1563 if (Imm < 0)
1564 return None;
1565 switch (SrcTy.getElementType().getSizeInBits()) {
1566 default:
1567 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1568 return None;
1569 case 8:
1570 if (Imm > 7)
1571 return None;
1572 break;
1573 case 16:
1574 if (Imm > 15)
1575 return None;
1576 break;
1577 case 32:
1578 if (Imm > 31)
1579 return None;
1580 break;
1581 case 64:
1582 if (Imm > 63)
1583 return None;
1584 break;
1585 }
1586 return Imm;
1587}
1588
1589bool AArch64InstructionSelector::selectVectorSHL(
1590 MachineInstr &I, MachineRegisterInfo &MRI) const {
1591 assert(I.getOpcode() == TargetOpcode::G_SHL)((I.getOpcode() == TargetOpcode::G_SHL) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1591, __PRETTY_FUNCTION__))
;
1592 Register DstReg = I.getOperand(0).getReg();
1593 const LLT Ty = MRI.getType(DstReg);
1594 Register Src1Reg = I.getOperand(1).getReg();
1595 Register Src2Reg = I.getOperand(2).getReg();
1596
1597 if (!Ty.isVector())
1598 return false;
1599
1600 // Check if we have a vector of constants on RHS that we can select as the
1601 // immediate form.
1602 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1603
1604 unsigned Opc = 0;
1605 if (Ty == LLT::vector(2, 64)) {
1606 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1607 } else if (Ty == LLT::vector(4, 32)) {
1608 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1609 } else if (Ty == LLT::vector(2, 32)) {
1610 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1611 } else if (Ty == LLT::vector(4, 16)) {
1612 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1613 } else if (Ty == LLT::vector(8, 16)) {
1614 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1615 } else if (Ty == LLT::vector(16, 8)) {
1616 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1617 } else if (Ty == LLT::vector(8, 8)) {
1618 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1619 } else {
1620 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1621 return false;
1622 }
1623
1624 MachineIRBuilder MIB(I);
1625 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1626 if (ImmVal)
1627 Shl.addImm(*ImmVal);
1628 else
1629 Shl.addUse(Src2Reg);
1630 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1631 I.eraseFromParent();
1632 return true;
1633}
1634
1635bool AArch64InstructionSelector::selectVectorAshrLshr(
1636 MachineInstr &I, MachineRegisterInfo &MRI) const {
1637 assert(I.getOpcode() == TargetOpcode::G_ASHR ||((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1638, __PRETTY_FUNCTION__))
1638 I.getOpcode() == TargetOpcode::G_LSHR)((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1638, __PRETTY_FUNCTION__))
;
1639 Register DstReg = I.getOperand(0).getReg();
1640 const LLT Ty = MRI.getType(DstReg);
1641 Register Src1Reg = I.getOperand(1).getReg();
1642 Register Src2Reg = I.getOperand(2).getReg();
1643
1644 if (!Ty.isVector())
1645 return false;
1646
1647 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1648
1649 // We expect the immediate case to be lowered in the PostLegalCombiner to
1650 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1651
1652 // There is not a shift right register instruction, but the shift left
1653 // register instruction takes a signed value, where negative numbers specify a
1654 // right shift.
1655
1656 unsigned Opc = 0;
1657 unsigned NegOpc = 0;
1658 const TargetRegisterClass *RC =
1659 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1660 if (Ty == LLT::vector(2, 64)) {
1661 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1662 NegOpc = AArch64::NEGv2i64;
1663 } else if (Ty == LLT::vector(4, 32)) {
1664 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1665 NegOpc = AArch64::NEGv4i32;
1666 } else if (Ty == LLT::vector(2, 32)) {
1667 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1668 NegOpc = AArch64::NEGv2i32;
1669 } else if (Ty == LLT::vector(4, 16)) {
1670 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1671 NegOpc = AArch64::NEGv4i16;
1672 } else if (Ty == LLT::vector(8, 16)) {
1673 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1674 NegOpc = AArch64::NEGv8i16;
1675 } else if (Ty == LLT::vector(16, 8)) {
1676 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1677 NegOpc = AArch64::NEGv8i16;
1678 } else if (Ty == LLT::vector(8, 8)) {
1679 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1680 NegOpc = AArch64::NEGv8i8;
1681 } else {
1682 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1683 return false;
1684 }
1685
1686 MachineIRBuilder MIB(I);
1687 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1688 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1689 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1690 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1691 I.eraseFromParent();
1692 return true;
1693}
1694
1695bool AArch64InstructionSelector::selectVaStartAAPCS(
1696 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1697 return false;
1698}
1699
1700bool AArch64InstructionSelector::selectVaStartDarwin(
1701 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1702 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1703 Register ListReg = I.getOperand(0).getReg();
1704
1705 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1706
1707 auto MIB =
1708 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1709 .addDef(ArgsAddrReg)
1710 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1711 .addImm(0)
1712 .addImm(0);
1713
1714 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1715
1716 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1717 .addUse(ArgsAddrReg)
1718 .addUse(ListReg)
1719 .addImm(0)
1720 .addMemOperand(*I.memoperands_begin());
1721
1722 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1723 I.eraseFromParent();
1724 return true;
1725}
1726
1727void AArch64InstructionSelector::materializeLargeCMVal(
1728 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1729 MachineBasicBlock &MBB = *I.getParent();
1730 MachineFunction &MF = *MBB.getParent();
1731 MachineRegisterInfo &MRI = MF.getRegInfo();
1732 MachineIRBuilder MIB(I);
1733
1734 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1735 MovZ->addOperand(MF, I.getOperand(1));
1736 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1737 AArch64II::MO_NC);
1738 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1739 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1740
1741 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1742 Register ForceDstReg) {
1743 Register DstReg = ForceDstReg
1744 ? ForceDstReg
1745 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1746 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1747 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1748 MovI->addOperand(MF, MachineOperand::CreateGA(
1749 GV, MovZ->getOperand(1).getOffset(), Flags));
1750 } else {
1751 MovI->addOperand(
1752 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1753 MovZ->getOperand(1).getOffset(), Flags));
1754 }
1755 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1756 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1757 return DstReg;
1758 };
1759 Register DstReg = BuildMovK(MovZ.getReg(0),
1760 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1761 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1762 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1763 return;
1764}
1765
1766bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1767 MachineBasicBlock &MBB = *I.getParent();
1768 MachineFunction &MF = *MBB.getParent();
1769 MachineRegisterInfo &MRI = MF.getRegInfo();
1770
1771 switch (I.getOpcode()) {
1772 case TargetOpcode::G_SHL:
1773 case TargetOpcode::G_ASHR:
1774 case TargetOpcode::G_LSHR: {
1775 // These shifts are legalized to have 64 bit shift amounts because we want
1776 // to take advantage of the existing imported selection patterns that assume
1777 // the immediates are s64s. However, if the shifted type is 32 bits and for
1778 // some reason we receive input GMIR that has an s64 shift amount that's not
1779 // a G_CONSTANT, insert a truncate so that we can still select the s32
1780 // register-register variant.
1781 Register SrcReg = I.getOperand(1).getReg();
1782 Register ShiftReg = I.getOperand(2).getReg();
1783 const LLT ShiftTy = MRI.getType(ShiftReg);
1784 const LLT SrcTy = MRI.getType(SrcReg);
1785 if (SrcTy.isVector())
1786 return false;
1787 assert(!ShiftTy.isVector() && "unexpected vector shift ty")((!ShiftTy.isVector() && "unexpected vector shift ty"
) ? static_cast<void> (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1787, __PRETTY_FUNCTION__))
;
1788 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1789 return false;
1790 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1791 assert(AmtMI && "could not find a vreg definition for shift amount")((AmtMI && "could not find a vreg definition for shift amount"
) ? static_cast<void> (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1791, __PRETTY_FUNCTION__))
;
1792 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1793 // Insert a subregister copy to implement a 64->32 trunc
1794 MachineIRBuilder MIB(I);
1795 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1796 .addReg(ShiftReg, 0, AArch64::sub_32);
1797 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1798 I.getOperand(2).setReg(Trunc.getReg(0));
1799 }
1800 return true;
1801 }
1802 case TargetOpcode::G_STORE:
1803 return contractCrossBankCopyIntoStore(I, MRI);
1804 case TargetOpcode::G_PTR_ADD:
1805 return convertPtrAddToAdd(I, MRI);
1806 case TargetOpcode::G_LOAD: {
1807 // For scalar loads of pointers, we try to convert the dest type from p0
1808 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1809 // conversion, this should be ok because all users should have been
1810 // selected already, so the type doesn't matter for them.
1811 Register DstReg = I.getOperand(0).getReg();
1812 const LLT DstTy = MRI.getType(DstReg);
1813 if (!DstTy.isPointer())
1814 return false;
1815 MRI.setType(DstReg, LLT::scalar(64));
1816 return true;
1817 }
1818 case AArch64::G_DUP: {
1819 // Convert the type from p0 to s64 to help selection.
1820 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1821 if (!DstTy.getElementType().isPointer())
1822 return false;
1823 MachineIRBuilder MIB(I);
1824 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1825 MRI.setType(I.getOperand(0).getReg(),
1826 DstTy.changeElementType(LLT::scalar(64)));
1827 MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1828 I.getOperand(1).setReg(NewSrc.getReg(0));
1829 return true;
1830 }
1831 default:
1832 return false;
1833 }
1834}
1835
1836/// This lowering tries to look for G_PTR_ADD instructions and then converts
1837/// them to a standard G_ADD with a COPY on the source.
1838///
1839/// The motivation behind this is to expose the add semantics to the imported
1840/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1841/// because the selector works bottom up, uses before defs. By the time we
1842/// end up trying to select a G_PTR_ADD, we should have already attempted to
1843/// fold this into addressing modes and were therefore unsuccessful.
1844bool AArch64InstructionSelector::convertPtrAddToAdd(
1845 MachineInstr &I, MachineRegisterInfo &MRI) {
1846 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")((I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1846, __PRETTY_FUNCTION__))
;
1847 Register DstReg = I.getOperand(0).getReg();
1848 Register AddOp1Reg = I.getOperand(1).getReg();
1849 const LLT PtrTy = MRI.getType(DstReg);
1850 if (PtrTy.getAddressSpace() != 0)
1851 return false;
1852
1853 MachineIRBuilder MIB(I);
1854 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1855 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1856 // Set regbanks on the registers.
1857 if (PtrTy.isVector())
1858 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1859 else
1860 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1861
1862 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1863 // %dst(intty) = G_ADD %intbase, off
1864 I.setDesc(TII.get(TargetOpcode::G_ADD));
1865 MRI.setType(DstReg, CastPtrTy);
1866 I.getOperand(1).setReg(PtrToInt.getReg(0));
1867 if (!select(*PtrToInt)) {
1868 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
1869 return false;
1870 }
1871
1872 // Also take the opportunity here to try to do some optimization.
1873 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
1874 Register NegatedReg;
1875 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
1876 return true;
1877 I.getOperand(2).setReg(NegatedReg);
1878 I.setDesc(TII.get(TargetOpcode::G_SUB));
1879 return true;
1880}
1881
1882bool AArch64InstructionSelector::earlySelectSHL(
1883 MachineInstr &I, MachineRegisterInfo &MRI) const {
1884 // We try to match the immediate variant of LSL, which is actually an alias
1885 // for a special case of UBFM. Otherwise, we fall back to the imported
1886 // selector which will match the register variant.
1887 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")((I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1887, __PRETTY_FUNCTION__))
;
1888 const auto &MO = I.getOperand(2);
1889 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1890 if (!VRegAndVal)
1891 return false;
1892
1893 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1894 if (DstTy.isVector())
1895 return false;
1896 bool Is64Bit = DstTy.getSizeInBits() == 64;
1897 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1898 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1899 MachineIRBuilder MIB(I);
1900
1901 if (!Imm1Fn || !Imm2Fn)
1902 return false;
1903
1904 auto NewI =
1905 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1906 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1907
1908 for (auto &RenderFn : *Imm1Fn)
1909 RenderFn(NewI);
1910 for (auto &RenderFn : *Imm2Fn)
1911 RenderFn(NewI);
1912
1913 I.eraseFromParent();
1914 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1915}
1916
1917bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1918 MachineInstr &I, MachineRegisterInfo &MRI) {
1919 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")((I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1919, __PRETTY_FUNCTION__))
;
1920 // If we're storing a scalar, it doesn't matter what register bank that
1921 // scalar is on. All that matters is the size.
1922 //
1923 // So, if we see something like this (with a 32-bit scalar as an example):
1924 //
1925 // %x:gpr(s32) = ... something ...
1926 // %y:fpr(s32) = COPY %x:gpr(s32)
1927 // G_STORE %y:fpr(s32)
1928 //
1929 // We can fix this up into something like this:
1930 //
1931 // G_STORE %x:gpr(s32)
1932 //
1933 // And then continue the selection process normally.
1934 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
1935 if (!DefDstReg.isValid())
1936 return false;
1937 LLT DefDstTy = MRI.getType(DefDstReg);
1938 Register StoreSrcReg = I.getOperand(0).getReg();
1939 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1940
1941 // If we get something strange like a physical register, then we shouldn't
1942 // go any further.
1943 if (!DefDstTy.isValid())
1944 return false;
1945
1946 // Are the source and dst types the same size?
1947 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1948 return false;
1949
1950 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1951 RBI.getRegBank(DefDstReg, MRI, TRI))
1952 return false;
1953
1954 // We have a cross-bank copy, which is entering a store. Let's fold it.
1955 I.getOperand(0).setReg(DefDstReg);
1956 return true;
1957}
1958
1959bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1960 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1960, __PRETTY_FUNCTION__))
;
1961 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1961, __PRETTY_FUNCTION__))
;
1962
1963 MachineBasicBlock &MBB = *I.getParent();
1964 MachineFunction &MF = *MBB.getParent();
1965 MachineRegisterInfo &MRI = MF.getRegInfo();
1966
1967 switch (I.getOpcode()) {
1968 case TargetOpcode::G_BR: {
1969 // If the branch jumps to the fallthrough block, don't bother emitting it.
1970 // Only do this for -O0 for a good code size improvement, because when
1971 // optimizations are enabled we want to leave this choice to
1972 // MachineBlockPlacement.
1973 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
1974 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
1975 return false;
1976 I.eraseFromParent();
1977 return true;
1978 }
1979 case TargetOpcode::G_SHL:
1980 return earlySelectSHL(I, MRI);
1981 case TargetOpcode::G_CONSTANT: {
1982 bool IsZero = false;
1983 if (I.getOperand(1).isCImm())
1984 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1985 else if (I.getOperand(1).isImm())
1986 IsZero = I.getOperand(1).getImm() == 0;
1987
1988 if (!IsZero)
1989 return false;
1990
1991 Register DefReg = I.getOperand(0).getReg();
1992 LLT Ty = MRI.getType(DefReg);
1993 if (Ty.getSizeInBits() == 64) {
1994 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1995 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1996 } else if (Ty.getSizeInBits() == 32) {
1997 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1998 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1999 } else
2000 return false;
2001
2002 I.setDesc(TII.get(TargetOpcode::COPY));
2003 return true;
2004 }
2005 default:
2006 return false;
2007 }
2008}
2009
2010bool AArch64InstructionSelector::select(MachineInstr &I) {
2011 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2011, __PRETTY_FUNCTION__))
;
2012 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2012, __PRETTY_FUNCTION__))
;
2013
2014 MachineBasicBlock &MBB = *I.getParent();
2015 MachineFunction &MF = *MBB.getParent();
2016 MachineRegisterInfo &MRI = MF.getRegInfo();
2017
2018 const AArch64Subtarget *Subtarget =
2019 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2020 if (Subtarget->requiresStrictAlign()) {
2021 // We don't support this feature yet.
2022 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2023 return false;
2024 }
2025
2026 unsigned Opcode = I.getOpcode();
2027 // G_PHI requires same handling as PHI
2028 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2029 // Certain non-generic instructions also need some special handling.
2030
2031 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2032 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2033
2034 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2035 const Register DefReg = I.getOperand(0).getReg();
2036 const LLT DefTy = MRI.getType(DefReg);
2037
2038 const RegClassOrRegBank &RegClassOrBank =
2039 MRI.getRegClassOrRegBank(DefReg);
2040
2041 const TargetRegisterClass *DefRC
2042 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2043 if (!DefRC) {
2044 if (!DefTy.isValid()) {
2045 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2046 return false;
2047 }
2048 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2049 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2050 if (!DefRC) {
2051 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2052 return false;
2053 }
2054 }
2055
2056 I.setDesc(TII.get(TargetOpcode::PHI));
2057
2058 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2059 }
2060
2061 if (I.isCopy())
2062 return selectCopy(I, TII, MRI, TRI, RBI);
2063
2064 return true;
2065 }
2066
2067
2068 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2069 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2070 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2071 return false;
2072 }
2073
2074 // Try to do some lowering before we start instruction selecting. These
2075 // lowerings are purely transformations on the input G_MIR and so selection
2076 // must continue after any modification of the instruction.
2077 if (preISelLower(I)) {
2078 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2079 }
2080
2081 // There may be patterns where the importer can't deal with them optimally,
2082 // but does select it to a suboptimal sequence so our custom C++ selection
2083 // code later never has a chance to work on it. Therefore, we have an early
2084 // selection attempt here to give priority to certain selection routines
2085 // over the imported ones.
2086 if (earlySelect(I))
2087 return true;
2088
2089 if (selectImpl(I, *CoverageInfo))
2090 return true;
2091
2092 LLT Ty =
2093 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2094
2095 MachineIRBuilder MIB(I);
2096
2097 switch (Opcode) {
2098 case TargetOpcode::G_BRCOND: {
2099 Register CondReg = I.getOperand(0).getReg();
2100 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
2101
2102 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
2103 // instructions will not be produced, as they are conditional branch
2104 // instructions that do not set flags.
2105 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
2106 return true;
2107
2108 if (ProduceNonFlagSettingCondBr) {
2109 auto TestBit = emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
2110 DestMBB, MIB);
2111 I.eraseFromParent();
2112 return constrainSelectedInstRegOperands(*TestBit, TII, TRI, RBI);
2113 } else {
2114 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2115 .addDef(AArch64::WZR)
2116 .addUse(CondReg)
2117 .addImm(1);
2118 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
2119 auto Bcc =
2120 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
2121 .addImm(AArch64CC::EQ)
2122 .addMBB(DestMBB);
2123
2124 I.eraseFromParent();
2125 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
2126 }
2127 }
2128
2129 case TargetOpcode::G_BRINDIRECT: {
2130 I.setDesc(TII.get(AArch64::BR));
2131 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2132 }
2133
2134 case TargetOpcode::G_BRJT:
2135 return selectBrJT(I, MRI);
2136
2137 case AArch64::G_ADD_LOW: {
2138 // This op may have been separated from it's ADRP companion by the localizer
2139 // or some other code motion pass. Given that many CPUs will try to
2140 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2141 // which will later be expanded into an ADRP+ADD pair after scheduling.
2142 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2143 if (BaseMI->getOpcode() != AArch64::ADRP) {
2144 I.setDesc(TII.get(AArch64::ADDXri));
2145 I.addOperand(MachineOperand::CreateImm(0));
2146 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2147 }
2148 assert(TM.getCodeModel() == CodeModel::Small &&((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2149, __PRETTY_FUNCTION__))
2149 "Expected small code model")((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2149, __PRETTY_FUNCTION__))
;
2150 MachineIRBuilder MIB(I);
2151 auto Op1 = BaseMI->getOperand(1);
2152 auto Op2 = I.getOperand(2);
2153 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2154 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2155 Op1.getTargetFlags())
2156 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2157 Op2.getTargetFlags());
2158 I.eraseFromParent();
2159 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2160 }
2161
2162 case TargetOpcode::G_BSWAP: {
2163 // Handle vector types for G_BSWAP directly.
2164 Register DstReg = I.getOperand(0).getReg();
2165 LLT DstTy = MRI.getType(DstReg);
2166
2167 // We should only get vector types here; everything else is handled by the
2168 // importer right now.
2169 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2170 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2171 return false;
2172 }
2173
2174 // Only handle 4 and 2 element vectors for now.
2175 // TODO: 16-bit elements.
2176 unsigned NumElts = DstTy.getNumElements();
2177 if (NumElts != 4 && NumElts != 2) {
2178 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2179 return false;
2180 }
2181
2182 // Choose the correct opcode for the supported types. Right now, that's
2183 // v2s32, v4s32, and v2s64.
2184 unsigned Opc = 0;
2185 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2186 if (EltSize == 32)
2187 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2188 : AArch64::REV32v16i8;
2189 else if (EltSize == 64)
2190 Opc = AArch64::REV64v16i8;
2191
2192 // We should always get something by the time we get here...
2193 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")((Opc != 0 && "Didn't get an opcode for G_BSWAP?") ? static_cast
<void> (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2193, __PRETTY_FUNCTION__))
;
2194
2195 I.setDesc(TII.get(Opc));
2196 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2197 }
2198
2199 case TargetOpcode::G_FCONSTANT:
2200 case TargetOpcode::G_CONSTANT: {
2201 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2202
2203 const LLT s8 = LLT::scalar(8);
2204 const LLT s16 = LLT::scalar(16);
2205 const LLT s32 = LLT::scalar(32);
2206 const LLT s64 = LLT::scalar(64);
2207 const LLT p0 = LLT::pointer(0, 64);
2208
2209 const Register DefReg = I.getOperand(0).getReg();
2210 const LLT DefTy = MRI.getType(DefReg);
2211 const unsigned DefSize = DefTy.getSizeInBits();
2212 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2213
2214 // FIXME: Redundant check, but even less readable when factored out.
2215 if (isFP) {
2216 if (Ty != s32 && Ty != s64) {
2217 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << '\n'; } } while (false)
2218 << " constant, expected: " << s32 << " or " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << '\n'; } } while (false)
2219 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << '\n'; } } while (false)
;
2220 return false;
2221 }
2222
2223 if (RB.getID() != AArch64::FPRRegBankID) {
2224 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2225 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2226 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2227 return false;
2228 }
2229
2230 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2231 // can be sure tablegen works correctly and isn't rescued by this code.
2232 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2233 return false;
2234 } else {
2235 // s32 and s64 are covered by tablegen.
2236 if (Ty != p0 && Ty != s8 && Ty != s16) {
2237 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2238 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2239 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2240 return false;
2241 }
2242
2243 if (RB.getID() != AArch64::GPRRegBankID) {
2244 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2245 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2246 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2247 return false;
2248 }
2249 }
2250
2251 // We allow G_CONSTANT of types < 32b.
2252 const unsigned MovOpc =
2253 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2254
2255 if (isFP) {
2256 // Either emit a FMOV, or emit a copy to emit a normal mov.
2257 const TargetRegisterClass &GPRRC =
2258 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2259 const TargetRegisterClass &FPRRC =
2260 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2261
2262 // Can we use a FMOV instruction to represent the immediate?
2263 if (emitFMovForFConstant(I, MRI))
2264 return true;
2265
2266 // For 64b values, emit a constant pool load instead.
2267 if (DefSize == 64) {
2268 auto *FPImm = I.getOperand(1).getFPImm();
2269 MachineIRBuilder MIB(I);
2270 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2271 if (!LoadMI) {
2272 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2273 return false;
2274 }
2275 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2276 I.eraseFromParent();
2277 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2278 }
2279
2280 // Nope. Emit a copy and use a normal mov instead.
2281 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2282 MachineOperand &RegOp = I.getOperand(0);
2283 RegOp.setReg(DefGPRReg);
2284 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2285 MIB.buildCopy({DefReg}, {DefGPRReg});
2286
2287 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2288 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2289 return false;
2290 }
2291
2292 MachineOperand &ImmOp = I.getOperand(1);
2293 // FIXME: Is going through int64_t always correct?
2294 ImmOp.ChangeToImmediate(
2295 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2296 } else if (I.getOperand(1).isCImm()) {
2297 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2298 I.getOperand(1).ChangeToImmediate(Val);
2299 } else if (I.getOperand(1).isImm()) {
2300 uint64_t Val = I.getOperand(1).getImm();
2301 I.getOperand(1).ChangeToImmediate(Val);
2302 }
2303
2304 I.setDesc(TII.get(MovOpc));
2305 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2306 return true;
2307 }
2308 case TargetOpcode::G_EXTRACT: {
2309 Register DstReg = I.getOperand(0).getReg();
2310 Register SrcReg = I.getOperand(1).getReg();
2311 LLT SrcTy = MRI.getType(SrcReg);
2312 LLT DstTy = MRI.getType(DstReg);
2313 (void)DstTy;
2314 unsigned SrcSize = SrcTy.getSizeInBits();
2315
2316 if (SrcTy.getSizeInBits() > 64) {
2317 // This should be an extract of an s128, which is like a vector extract.
2318 if (SrcTy.getSizeInBits() != 128)
2319 return false;
2320 // Only support extracting 64 bits from an s128 at the moment.
2321 if (DstTy.getSizeInBits() != 64)
2322 return false;
2323
2324 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2325 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2326 // Check we have the right regbank always.
2327 assert(SrcRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2329, __PRETTY_FUNCTION__))
2328 DstRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2329, __PRETTY_FUNCTION__))
2329 "Wrong extract regbank!")((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2329, __PRETTY_FUNCTION__))
;
2330 (void)SrcRB;
2331
2332 // Emit the same code as a vector extract.
2333 // Offset must be a multiple of 64.
2334 unsigned Offset = I.getOperand(2).getImm();
2335 if (Offset % 64 != 0)
2336 return false;
2337 unsigned LaneIdx = Offset / 64;
2338 MachineIRBuilder MIB(I);
2339 MachineInstr *Extract = emitExtractVectorElt(
2340 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2341 if (!Extract)
2342 return false;
2343 I.eraseFromParent();
2344 return true;
2345 }
2346
2347 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2348 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2349 Ty.getSizeInBits() - 1);
2350
2351 if (SrcSize < 64) {
2352 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2353, __PRETTY_FUNCTION__))
2353 "unexpected G_EXTRACT types")((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2353, __PRETTY_FUNCTION__))
;
2354 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2355 }
2356
2357 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2358 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2359 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2360 .addReg(DstReg, 0, AArch64::sub_32);
2361 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2362 AArch64::GPR32RegClass, MRI);
2363 I.getOperand(0).setReg(DstReg);
2364
2365 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2366 }
2367
2368 case TargetOpcode::G_INSERT: {
2369 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2370 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2371 unsigned DstSize = DstTy.getSizeInBits();
2372 // Larger inserts are vectors, same-size ones should be something else by
2373 // now (split up or turned into COPYs).
2374 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2375 return false;
2376
2377 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2378 unsigned LSB = I.getOperand(3).getImm();
2379 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2380 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2381 MachineInstrBuilder(MF, I).addImm(Width - 1);
2382
2383 if (DstSize < 64) {
2384 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2385, __PRETTY_FUNCTION__))
2385 "unexpected G_INSERT types")((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2385, __PRETTY_FUNCTION__))
;
2386 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2387 }
2388
2389 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2390 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2391 TII.get(AArch64::SUBREG_TO_REG))
2392 .addDef(SrcReg)
2393 .addImm(0)
2394 .addUse(I.getOperand(2).getReg())
2395 .addImm(AArch64::sub_32);
2396 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2397 AArch64::GPR32RegClass, MRI);
2398 I.getOperand(2).setReg(SrcReg);
2399
2400 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2401 }
2402 case TargetOpcode::G_FRAME_INDEX: {
2403 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2404 if (Ty != LLT::pointer(0, 64)) {
2405 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2406 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2407 return false;
2408 }
2409 I.setDesc(TII.get(AArch64::ADDXri));
2410
2411 // MOs for a #0 shifted immediate.
2412 I.addOperand(MachineOperand::CreateImm(0));
2413 I.addOperand(MachineOperand::CreateImm(0));
2414
2415 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2416 }
2417
2418 case TargetOpcode::G_GLOBAL_VALUE: {
2419 auto GV = I.getOperand(1).getGlobal();
2420 if (GV->isThreadLocal())
2421 return selectTLSGlobalValue(I, MRI);
2422
2423 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2424 if (OpFlags & AArch64II::MO_GOT) {
2425 I.setDesc(TII.get(AArch64::LOADgot));
2426 I.getOperand(1).setTargetFlags(OpFlags);
2427 } else if (TM.getCodeModel() == CodeModel::Large) {
2428 // Materialize the global using movz/movk instructions.
2429 materializeLargeCMVal(I, GV, OpFlags);
2430 I.eraseFromParent();
2431 return true;
2432 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2433 I.setDesc(TII.get(AArch64::ADR));
2434 I.getOperand(1).setTargetFlags(OpFlags);
2435 } else {
2436 I.setDesc(TII.get(AArch64::MOVaddr));
2437 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2438 MachineInstrBuilder MIB(MF, I);
2439 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2440 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2441 }
2442 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2443 }
2444
2445 case TargetOpcode::G_ZEXTLOAD:
2446 case TargetOpcode::G_LOAD:
2447 case TargetOpcode::G_STORE: {
2448 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2449 MachineIRBuilder MIB(I);
2450
2451 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2452
2453 if (PtrTy != LLT::pointer(0, 64)) {
2454 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2455 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2456 return false;
2457 }
2458
2459 auto &MemOp = **I.memoperands_begin();
2460 uint64_t MemSizeInBytes = MemOp.getSize();
2461 if (MemOp.isAtomic()) {
2462 // For now we just support s8 acquire loads to be able to compile stack
2463 // protector code.
2464 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2465 MemSizeInBytes == 1) {
2466 I.setDesc(TII.get(AArch64::LDARB));
2467 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2468 }
2469 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Atomic load/store not fully supported yet\n"
; } } while (false)
;
2470 return false;
2471 }
2472 unsigned MemSizeInBits = MemSizeInBytes * 8;
2473
2474#ifndef NDEBUG
2475 const Register PtrReg = I.getOperand(1).getReg();
2476 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2477 // Sanity-check the pointer register.
2478 assert(PtrRB.getID() == AArch64::GPRRegBankID &&((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2479, __PRETTY_FUNCTION__))
2479 "Load/Store pointer operand isn't a GPR")((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2479, __PRETTY_FUNCTION__))
;
2480 assert(MRI.getType(PtrReg).isPointer() &&((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2481, __PRETTY_FUNCTION__))
2481 "Load/Store pointer operand isn't a pointer")((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2481, __PRETTY_FUNCTION__))
;
2482#endif
2483
2484 const Register ValReg = I.getOperand(0).getReg();
2485 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2486
2487 // Helper lambda for partially selecting I. Either returns the original
2488 // instruction with an updated opcode, or a new instruction.
2489 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2490 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2491 const unsigned NewOpc =
2492 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2493 if (NewOpc == I.getOpcode())
2494 return nullptr;
2495 // Check if we can fold anything into the addressing mode.
2496 auto AddrModeFns =
2497 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2498 if (!AddrModeFns) {
2499 // Can't fold anything. Use the original instruction.
2500 I.setDesc(TII.get(NewOpc));
2501 I.addOperand(MachineOperand::CreateImm(0));
2502 return &I;
2503 }
2504
2505 // Folded something. Create a new instruction and return it.
2506 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2507 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2508 NewInst.cloneMemRefs(I);
2509 for (auto &Fn : *AddrModeFns)
2510 Fn(NewInst);
2511 I.eraseFromParent();
2512 return &*NewInst;
2513 };
2514
2515 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2516 if (!LoadStore)
2517 return false;
2518
2519 // If we're storing a 0, use WZR/XZR.
2520 if (Opcode == TargetOpcode::G_STORE) {
2521 auto CVal = getConstantVRegValWithLookThrough(
2522 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2523 /*HandleFConstants = */ false);
2524 if (CVal && CVal->Value == 0) {
2525 switch (LoadStore->getOpcode()) {
2526 case AArch64::STRWui:
2527 case AArch64::STRHHui:
2528 case AArch64::STRBBui:
2529 LoadStore->getOperand(0).setReg(AArch64::WZR);
2530 break;
2531 case AArch64::STRXui:
2532 LoadStore->getOperand(0).setReg(AArch64::XZR);
2533 break;
2534 }
2535 }
2536 }
2537
2538 if (IsZExtLoad) {
2539 // The zextload from a smaller type to i32 should be handled by the
2540 // importer.
2541 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2542 return false;
2543 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2544 // and zero_extend with SUBREG_TO_REG.
2545 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2546 Register DstReg = LoadStore->getOperand(0).getReg();
2547 LoadStore->getOperand(0).setReg(LdReg);
2548
2549 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2550 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2551 .addImm(0)
2552 .addUse(LdReg)
2553 .addImm(AArch64::sub_32);
2554 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2555 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2556 MRI);
2557 }
2558 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2559 }
2560
2561 case TargetOpcode::G_SMULH:
2562 case TargetOpcode::G_UMULH: {
2563 // Reject the various things we don't support yet.
2564 if (unsupportedBinOp(I, RBI, MRI, TRI))
2565 return false;
2566
2567 const Register DefReg = I.getOperand(0).getReg();
2568 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2569
2570 if (RB.getID() != AArch64::GPRRegBankID) {
2571 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2572 return false;
2573 }
2574
2575 if (Ty != LLT::scalar(64)) {
2576 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2577 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2578 return false;
2579 }
2580
2581 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2582 : AArch64::UMULHrr;
2583 I.setDesc(TII.get(NewOpc));
2584
2585 // Now that we selected an opcode, we need to constrain the register
2586 // operands to use appropriate classes.
2587 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2588 }
2589 case TargetOpcode::G_LSHR:
2590 case TargetOpcode::G_ASHR:
2591 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2592 return selectVectorAshrLshr(I, MRI);
2593 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2594 case TargetOpcode::G_SHL:
2595 if (Opcode == TargetOpcode::G_SHL &&
2596 MRI.getType(I.getOperand(0).getReg()).isVector())
2597 return selectVectorSHL(I, MRI);
2598 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2599 case TargetOpcode::G_FADD:
2600 case TargetOpcode::G_FSUB:
2601 case TargetOpcode::G_FMUL:
2602 case TargetOpcode::G_FDIV:
2603 case TargetOpcode::G_OR: {
2604 // Reject the various things we don't support yet.
2605 if (unsupportedBinOp(I, RBI, MRI, TRI))
2606 return false;
2607
2608 const unsigned OpSize = Ty.getSizeInBits();
2609
2610 const Register DefReg = I.getOperand(0).getReg();
2611 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2612
2613 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2614 if (NewOpc == I.getOpcode())
2615 return false;
2616
2617 I.setDesc(TII.get(NewOpc));
2618 // FIXME: Should the type be always reset in setDesc?
2619
2620 // Now that we selected an opcode, we need to constrain the register
2621 // operands to use appropriate classes.
2622 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2623 }
2624
2625 case TargetOpcode::G_PTR_ADD: {
2626 MachineIRBuilder MIRBuilder(I);
2627 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2628 MIRBuilder);
2629 I.eraseFromParent();
2630 return true;
2631 }
2632 case TargetOpcode::G_UADDO: {
2633 // TODO: Support other types.
2634 unsigned OpSize = Ty.getSizeInBits();
2635 if (OpSize != 32 && OpSize != 64) {
2636 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for 32 and 64 b types.\n"
; } } while (false)
2637 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for 32 and 64 b types.\n"
; } } while (false)
2638 << "G_UADDO currently only supported for 32 and 64 b types.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for 32 and 64 b types.\n"
; } } while (false)
;
2639 return false;
2640 }
2641
2642 // TODO: Support vectors.
2643 if (Ty.isVector()) {
2644 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_UADDO currently only supported for scalars.\n"
; } } while (false)
;
2645 return false;
2646 }
2647
2648 // Add and set the set condition flag.
2649 MachineIRBuilder MIRBuilder(I);
2650 emitADDS(I.getOperand(0).getReg(), I.getOperand(2), I.getOperand(3),
2651 MIRBuilder);
2652
2653 // Now, put the overflow result in the register given by the first operand
2654 // to the G_UADDO. CSINC increments the result when the predicate is false,
2655 // so to get the increment when it's true, we need to use the inverse. In
2656 // this case, we want to increment when carry is set.
2657 auto CsetMI = MIRBuilder
2658 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2659 {Register(AArch64::WZR), Register(AArch64::WZR)})
2660 .addImm(getInvertedCondCode(AArch64CC::HS));
2661 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2662 I.eraseFromParent();
2663 return true;
2664 }
2665
2666 case TargetOpcode::G_PTRMASK: {
2667 Register MaskReg = I.getOperand(2).getReg();
2668 Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2669 // TODO: Implement arbitrary cases
2670 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2671 return false;
2672
2673 uint64_t Mask = *MaskVal;
2674 I.setDesc(TII.get(AArch64::ANDXri));
2675 I.getOperand(2).ChangeToImmediate(
2676 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2677
2678 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2679 }
2680 case TargetOpcode::G_PTRTOINT:
2681 case TargetOpcode::G_TRUNC: {
2682 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2683 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2684
2685 const Register DstReg = I.getOperand(0).getReg();
2686 const Register SrcReg = I.getOperand(1).getReg();
2687
2688 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2689 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2690
2691 if (DstRB.getID() != SrcRB.getID()) {
2692 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
2693 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
2694 return false;
2695 }
2696
2697 if (DstRB.getID() == AArch64::GPRRegBankID) {
2698 const TargetRegisterClass *DstRC =
2699 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2700 if (!DstRC)
2701 return false;
2702
2703 const TargetRegisterClass *SrcRC =
2704 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2705 if (!SrcRC)
2706 return false;
2707
2708 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2709 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2710 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2711 return false;
2712 }
2713
2714 if (DstRC == SrcRC) {
2715 // Nothing to be done
2716 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2717 SrcTy == LLT::scalar(64)) {
2718 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2718)
;
2719 return false;
2720 } else if (DstRC == &AArch64::GPR32RegClass &&
2721 SrcRC == &AArch64::GPR64RegClass) {
2722 I.getOperand(1).setSubReg(AArch64::sub_32);
2723 } else {
2724 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
2725 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2726 return false;
2727 }
2728
2729 I.setDesc(TII.get(TargetOpcode::COPY));
2730 return true;
2731 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2732 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2733 I.setDesc(TII.get(AArch64::XTNv4i16));
2734 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2735 return true;
2736 }
2737
2738 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2739 MachineIRBuilder MIB(I);
2740 MachineInstr *Extract = emitExtractVectorElt(
2741 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2742 if (!Extract)
2743 return false;
2744 I.eraseFromParent();
2745 return true;
2746 }
2747
2748 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2749 if (Opcode == TargetOpcode::G_PTRTOINT) {
2750 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")((DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? static_cast<void> (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2750, __PRETTY_FUNCTION__))
;
2751 I.setDesc(TII.get(TargetOpcode::COPY));
2752 return true;
2753 }
2754 }
2755
2756 return false;
2757 }
2758
2759 case TargetOpcode::G_ANYEXT: {
2760 const Register DstReg = I.getOperand(0).getReg();
2761 const Register SrcReg = I.getOperand(1).getReg();
2762
2763 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2764 if (RBDst.getID() != AArch64::GPRRegBankID) {
2765 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
2766 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
2767 return false;
2768 }
2769
2770 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2771 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2772 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
2773 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
2774 return false;
2775 }
2776
2777 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2778
2779 if (DstSize == 0) {
2780 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
2781 return false;
2782 }
2783
2784 if (DstSize != 64 && DstSize > 32) {
2785 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
2786 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
2787 return false;
2788 }
2789 // At this point G_ANYEXT is just like a plain COPY, but we need
2790 // to explicitly form the 64-bit value if any.
2791 if (DstSize > 32) {
2792 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2793 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2794 .addDef(ExtSrc)
2795 .addImm(0)
2796 .addUse(SrcReg)
2797 .addImm(AArch64::sub_32);
2798 I.getOperand(1).setReg(ExtSrc);
2799 }
2800 return selectCopy(I, TII, MRI, TRI, RBI);
2801 }
2802
2803 case TargetOpcode::G_ZEXT:
2804 case TargetOpcode::G_SEXT_INREG:
2805 case TargetOpcode::G_SEXT: {
2806 unsigned Opcode = I.getOpcode();
2807 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2808 const Register DefReg = I.getOperand(0).getReg();
2809 Register SrcReg = I.getOperand(1).getReg();
2810 const LLT DstTy = MRI.getType(DefReg);
2811 const LLT SrcTy = MRI.getType(SrcReg);
2812 unsigned DstSize = DstTy.getSizeInBits();
2813 unsigned SrcSize = SrcTy.getSizeInBits();
2814
2815 // SEXT_INREG has the same src reg size as dst, the size of the value to be
2816 // extended is encoded in the imm.
2817 if (Opcode == TargetOpcode::G_SEXT_INREG)
2818 SrcSize = I.getOperand(2).getImm();
2819
2820 if (DstTy.isVector())
2821 return false; // Should be handled by imported patterns.
2822
2823 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2825, __PRETTY_FUNCTION__))
2824 AArch64::GPRRegBankID &&(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2825, __PRETTY_FUNCTION__))
2825 "Unexpected ext regbank")(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2825, __PRETTY_FUNCTION__))
;
2826
2827 MachineIRBuilder MIB(I);
2828 MachineInstr *ExtI;
2829
2830 // First check if we're extending the result of a load which has a dest type
2831 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2832 // GPR register on AArch64 and all loads which are smaller automatically
2833 // zero-extend the upper bits. E.g.
2834 // %v(s8) = G_LOAD %p, :: (load 1)
2835 // %v2(s32) = G_ZEXT %v(s8)
2836 if (!IsSigned) {
2837 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2838 bool IsGPR =
2839 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2840 if (LoadMI && IsGPR) {
2841 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2842 unsigned BytesLoaded = MemOp->getSize();
2843 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2844 return selectCopy(I, TII, MRI, TRI, RBI);
2845 }
2846
2847 // If we are zero extending from 32 bits to 64 bits, it's possible that
2848 // the instruction implicitly does the zero extend for us. In that case,
2849 // we can just emit a SUBREG_TO_REG.
2850 if (IsGPR && SrcSize == 32 && DstSize == 64) {
2851 // Unlike with the G_LOAD case, we don't want to look through copies
2852 // here.
2853 MachineInstr *Def = MRI.getVRegDef(SrcReg);
2854 if (Def && isDef32(*Def)) {
2855 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2856 .addImm(0)
2857 .addUse(SrcReg)
2858 .addImm(AArch64::sub_32);
2859
2860 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2861 MRI)) {
2862 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
2863 return false;
2864 }
2865
2866 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2867 MRI)) {
2868 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
2869 return false;
2870 }
2871
2872 I.eraseFromParent();
2873 return true;
2874 }
2875 }
2876 }
2877
2878 if (DstSize == 64) {
2879 if (Opcode != TargetOpcode::G_SEXT_INREG) {
2880 // FIXME: Can we avoid manually doing this?
2881 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2882 MRI)) {
2883 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
2884 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
2885 return false;
2886 }
2887 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2888 {&AArch64::GPR64RegClass}, {})
2889 .addImm(0)
2890 .addUse(SrcReg)
2891 .addImm(AArch64::sub_32)
2892 .getReg(0);
2893 }
2894
2895 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2896 {DefReg}, {SrcReg})
2897 .addImm(0)
2898 .addImm(SrcSize - 1);
2899 } else if (DstSize <= 32) {
2900 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2901 {DefReg}, {SrcReg})
2902 .addImm(0)
2903 .addImm(SrcSize - 1);
2904 } else {
2905 return false;
2906 }
2907
2908 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2909 I.eraseFromParent();
2910 return true;
2911 }
2912
2913 case TargetOpcode::G_SITOFP:
2914 case TargetOpcode::G_UITOFP:
2915 case TargetOpcode::G_FPTOSI:
2916 case TargetOpcode::G_FPTOUI: {
2917 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2918 SrcTy = MRI.getType(I.getOperand(1).getReg());
2919 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2920 if (NewOpc == Opcode)
2921 return false;
2922
2923 I.setDesc(TII.get(NewOpc));
2924 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2925
2926 return true;
2927 }
2928
2929 case TargetOpcode::G_FREEZE:
2930 return selectCopy(I, TII, MRI, TRI, RBI);
2931
2932 case TargetOpcode::G_INTTOPTR:
2933 // The importer is currently unable to import pointer types since they
2934 // didn't exist in SelectionDAG.
2935 return selectCopy(I, TII, MRI, TRI, RBI);
2936
2937 case TargetOpcode::G_BITCAST:
2938 // Imported SelectionDAG rules can handle every bitcast except those that
2939 // bitcast from a type to the same type. Ideally, these shouldn't occur
2940 // but we might not run an optimizer that deletes them. The other exception
2941 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2942 // of them.
2943 return selectCopy(I, TII, MRI, TRI, RBI);
2944
2945 case TargetOpcode::G_SELECT: {
2946 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2947 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
2948 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
2949 return false;
2950 }
2951
2952 const Register CondReg = I.getOperand(1).getReg();
2953 const Register TReg = I.getOperand(2).getReg();
2954 const Register FReg = I.getOperand(3).getReg();
2955
2956 if (tryOptSelect(I))
2957 return true;
2958
2959 // Make sure to use an unused vreg instead of wzr, so that the peephole
2960 // optimizations will be able to optimize these.
2961 MachineIRBuilder MIB(I);
2962 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2963 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
2964 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2965 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
2966 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
2967 return false;
2968 I.eraseFromParent();
2969 return true;
2970 }
2971 case TargetOpcode::G_ICMP: {
2972 if (Ty.isVector())
2973 return selectVectorICmp(I, MRI);
2974
2975 if (Ty != LLT::scalar(32)) {
2976 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
2977 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
2978 return false;
2979 }
2980
2981 MachineIRBuilder MIRBuilder(I);
2982 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
2983 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2984 MIRBuilder);
2985 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
2986 I.eraseFromParent();
2987 return true;
2988 }
2989
2990 case TargetOpcode::G_FCMP: {
2991 MachineIRBuilder MIRBuilder(I);
2992 CmpInst::Predicate Pred =
2993 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
2994 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
2995 MIRBuilder) ||
2996 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
2997 return false;
2998 I.eraseFromParent();
2999 return true;
3000 }
3001 case TargetOpcode::G_VASTART:
3002 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3003 : selectVaStartAAPCS(I, MF, MRI);
3004 case TargetOpcode::G_INTRINSIC:
3005 return selectIntrinsic(I, MRI);
3006 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3007 return selectIntrinsicWithSideEffects(I, MRI);
3008 case TargetOpcode::G_IMPLICIT_DEF: {
3009 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3010 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3011 const Register DstReg = I.getOperand(0).getReg();
3012 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3013 const TargetRegisterClass *DstRC =
3014 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3015 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3016 return true;
3017 }
3018 case TargetOpcode::G_BLOCK_ADDR: {
3019 if (TM.getCodeModel() == CodeModel::Large) {
3020 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3021 I.eraseFromParent();
3022 return true;
3023 } else {
3024 I.setDesc(TII.get(AArch64::MOVaddrBA));
3025 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3026 I.getOperand(0).getReg())
3027 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3028 /* Offset */ 0, AArch64II::MO_PAGE)
3029 .addBlockAddress(
3030 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3031 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3032 I.eraseFromParent();
3033 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3034 }
3035 }
3036 case AArch64::G_DUP: {
3037 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3038 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3039 // difficult because at RBS we may end up pessimizing the fpr case if we
3040 // decided to add an anyextend to fix this. Manual selection is the most
3041 // robust solution for now.
3042 Register SrcReg = I.getOperand(1).getReg();
3043 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
3044 return false; // We expect the fpr regbank case to be imported.
3045 LLT SrcTy = MRI.getType(SrcReg);
3046 if (SrcTy.getSizeInBits() == 16)
3047 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3048 else if (SrcTy.getSizeInBits() == 8)
3049 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3050 else
3051 return false;
3052 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3053 }
3054 case TargetOpcode::G_INTRINSIC_TRUNC:
3055 return selectIntrinsicTrunc(I, MRI);
3056 case TargetOpcode::G_INTRINSIC_ROUND:
3057 return selectIntrinsicRound(I, MRI);
3058 case TargetOpcode::G_BUILD_VECTOR:
3059 return selectBuildVector(I, MRI);
3060 case TargetOpcode::G_MERGE_VALUES:
3061 return selectMergeValues(I, MRI);
3062 case TargetOpcode::G_UNMERGE_VALUES:
3063 return selectUnmergeValues(I, MRI);
3064 case TargetOpcode::G_SHUFFLE_VECTOR:
3065 return selectShuffleVector(I, MRI);
3066 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3067 return selectExtractElt(I, MRI);
3068 case TargetOpcode::G_INSERT_VECTOR_ELT:
3069 return selectInsertElt(I, MRI);
3070 case TargetOpcode::G_CONCAT_VECTORS:
3071 return selectConcatVectors(I, MRI);
3072 case TargetOpcode::G_JUMP_TABLE:
3073 return selectJumpTable(I, MRI);
3074 case TargetOpcode::G_VECREDUCE_FADD:
3075 case TargetOpcode::G_VECREDUCE_ADD:
3076 return selectReduction(I, MRI);
3077 }
3078
3079 return false;
3080}
3081
3082bool AArch64InstructionSelector::selectReduction(
3083 MachineInstr &I, MachineRegisterInfo &MRI) const {
3084 Register VecReg = I.getOperand(1).getReg();
3085 LLT VecTy = MRI.getType(VecReg);
3086 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3087 unsigned Opc = 0;
3088 if (VecTy == LLT::vector(16, 8))
3089 Opc = AArch64::ADDVv16i8v;
3090 else if (VecTy == LLT::vector(8, 16))
3091 Opc = AArch64::ADDVv8i16v;
3092 else if (VecTy == LLT::vector(4, 32))
3093 Opc = AArch64::ADDVv4i32v;
3094 else if (VecTy == LLT::vector(2, 64))
3095 Opc = AArch64::ADDPv2i64p;
3096 else {
3097 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3098 return false;
3099 }
3100 I.setDesc(TII.get(Opc));
3101 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3102 }
3103
3104 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3105 unsigned Opc = 0;
3106 if (VecTy == LLT::vector(2, 32))
3107 Opc = AArch64::FADDPv2i32p;
3108 else if (VecTy == LLT::vector(2, 64))
3109 Opc = AArch64::FADDPv2i64p;
3110 else {
3111 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3112 return false;
3113 }
3114 I.setDesc(TII.get(Opc));
3115 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3116 }
3117 return false;
3118}
3119
3120bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3121 MachineRegisterInfo &MRI) const {
3122 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")((I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3122, __PRETTY_FUNCTION__))
;
3123 Register JTAddr = I.getOperand(0).getReg();
3124 unsigned JTI = I.getOperand(1).getIndex();
3125 Register Index = I.getOperand(2).getReg();
3126 MachineIRBuilder MIB(I);
3127
3128 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3129 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3130
3131 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3132 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3133 {TargetReg, ScratchReg}, {JTAddr, Index})
3134 .addJumpTableIndex(JTI);
3135 // Build the indirect branch.
3136 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3137 I.eraseFromParent();
3138 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3139}
3140
3141bool AArch64InstructionSelector::selectJumpTable(
3142 MachineInstr &I, MachineRegisterInfo &MRI) const {
3143 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")((I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3143, __PRETTY_FUNCTION__))
;
3144 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")((I.getOperand(1).isJTI() && "Jump table op should have a JTI!"
) ? static_cast<void> (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3144, __PRETTY_FUNCTION__))
;
3145
3146 Register DstReg = I.getOperand(0).getReg();
3147 unsigned JTI = I.getOperand(1).getIndex();
3148 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3149 MachineIRBuilder MIB(I);
3150 auto MovMI =
3151 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3152 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3153 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3154 I.eraseFromParent();
3155 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3156}
3157
3158bool AArch64InstructionSelector::selectTLSGlobalValue(
3159 MachineInstr &I, MachineRegisterInfo &MRI) const {
3160 if (!STI.isTargetMachO())
3161 return false;
3162 MachineFunction &MF = *I.getParent()->getParent();
3163 MF.getFrameInfo().setAdjustsStack(true);
3164
3165 const GlobalValue &GV = *I.getOperand(1).getGlobal();
3166 MachineIRBuilder MIB(I);
3167
3168 auto LoadGOT =
3169 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3170 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3171
3172 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3173 {LoadGOT.getReg(0)})
3174 .addImm(0);
3175
3176 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3177 // TLS calls preserve all registers except those that absolutely must be
3178 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3179 // silly).
3180 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3181 .addUse(AArch64::X0, RegState::Implicit)
3182 .addDef(AArch64::X0, RegState::Implicit)
3183 .addRegMask(TRI.getTLSCallPreservedMask());
3184
3185 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3186 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3187 MRI);
3188 I.eraseFromParent();
3189 return true;
3190}
3191
3192bool AArch64InstructionSelector::selectIntrinsicTrunc(
3193 MachineInstr &I, MachineRegisterInfo &MRI) const {
3194 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3195
3196 // Select the correct opcode.
3197 unsigned Opc = 0;
3198 if (!SrcTy.isVector()) {
3199 switch (SrcTy.getSizeInBits()) {
3200 default:
3201 case 16:
3202 Opc = AArch64::FRINTZHr;
3203 break;
3204 case 32:
3205 Opc = AArch64::FRINTZSr;
3206 break;
3207 case 64:
3208 Opc = AArch64::FRINTZDr;
3209 break;
3210 }
3211 } else {
3212 unsigned NumElts = SrcTy.getNumElements();
3213 switch (SrcTy.getElementType().getSizeInBits()) {
3214 default:
3215 break;
3216 case 16:
3217 if (NumElts == 4)
3218 Opc = AArch64::FRINTZv4f16;
3219 else if (NumElts == 8)
3220 Opc = AArch64::FRINTZv8f16;
3221 break;
3222 case 32:
3223 if (NumElts == 2)
3224 Opc = AArch64::FRINTZv2f32;
3225 else if (NumElts == 4)
3226 Opc = AArch64::FRINTZv4f32;
3227 break;
3228 case 64:
3229 if (NumElts == 2)
3230 Opc = AArch64::FRINTZv2f64;
3231 break;
3232 }
3233 }
3234
3235 if (!Opc) {
3236 // Didn't get an opcode above, bail.
3237 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3238 return false;
3239 }
3240
3241 // Legalization would have set us up perfectly for this; we just need to
3242 // set the opcode and move on.
3243 I.setDesc(TII.get(Opc));
3244 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3245}
3246
3247bool AArch64InstructionSelector::selectIntrinsicRound(
3248 MachineInstr &I, MachineRegisterInfo &MRI) const {
3249 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3250
3251 // Select the correct opcode.
3252 unsigned Opc = 0;
3253 if (!SrcTy.isVector()) {
3254 switch (SrcTy.getSizeInBits()) {
3255 default:
3256 case 16:
3257 Opc = AArch64::FRINTAHr;
3258 break;
3259 case 32:
3260 Opc = AArch64::FRINTASr;
3261 break;
3262 case 64:
3263 Opc = AArch64::FRINTADr;
3264 break;
3265 }
3266 } else {
3267 unsigned NumElts = SrcTy.getNumElements();
3268 switch (SrcTy.getElementType().getSizeInBits()) {
3269 default:
3270 break;
3271 case 16:
3272 if (NumElts == 4)
3273 Opc = AArch64::FRINTAv4f16;
3274 else if (NumElts == 8)
3275 Opc = AArch64::FRINTAv8f16;
3276 break;
3277 case 32:
3278 if (NumElts == 2)
3279 Opc = AArch64::FRINTAv2f32;
3280 else if (NumElts == 4)
3281 Opc = AArch64::FRINTAv4f32;
3282 break;
3283 case 64:
3284 if (NumElts == 2)
3285 Opc = AArch64::FRINTAv2f64;
3286 break;
3287 }
3288 }
3289
3290 if (!Opc) {
3291 // Didn't get an opcode above, bail.
3292 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3293 return false;
3294 }
3295
3296 // Legalization would have set us up perfectly for this; we just need to
3297 // set the opcode and move on.
3298 I.setDesc(TII.get(Opc));
3299 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3300}
3301
3302bool AArch64InstructionSelector::selectVectorICmp(
3303 MachineInstr &I, MachineRegisterInfo &MRI) const {
3304 Register DstReg = I.getOperand(0).getReg();
3305 LLT DstTy = MRI.getType(DstReg);
3306 Register SrcReg = I.getOperand(2).getReg();
3307 Register Src2Reg = I.getOperand(3).getReg();
3308 LLT SrcTy = MRI.getType(SrcReg);
3309
3310 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3311 unsigned NumElts = DstTy.getNumElements();
3312
3313 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3314 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3315 // Third index is cc opcode:
3316 // 0 == eq
3317 // 1 == ugt
3318 // 2 == uge
3319 // 3 == ult
3320 // 4 == ule
3321 // 5 == sgt
3322 // 6 == sge
3323 // 7 == slt
3324 // 8 == sle
3325 // ne is done by negating 'eq' result.
3326
3327 // This table below assumes that for some comparisons the operands will be
3328 // commuted.
3329 // ult op == commute + ugt op
3330 // ule op == commute + uge op
3331 // slt op == commute + sgt op
3332 // sle op == commute + sge op
3333 unsigned PredIdx = 0;
3334 bool SwapOperands = false;
3335 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3336 switch (Pred) {
3337 case CmpInst::ICMP_NE:
3338 case CmpInst::ICMP_EQ:
3339 PredIdx = 0;
3340 break;
3341 case CmpInst::ICMP_UGT:
3342 PredIdx = 1;
3343 break;
3344 case CmpInst::ICMP_UGE:
3345 PredIdx = 2;
3346 break;
3347 case CmpInst::ICMP_ULT:
3348 PredIdx = 3;
3349 SwapOperands = true;
3350 break;
3351 case CmpInst::ICMP_ULE:
3352 PredIdx = 4;
3353 SwapOperands = true;
3354 break;
3355 case CmpInst::ICMP_SGT:
3356 PredIdx = 5;
3357 break;
3358 case CmpInst::ICMP_SGE:
3359 PredIdx = 6;
3360 break;
3361 case CmpInst::ICMP_SLT:
3362 PredIdx = 7;
3363 SwapOperands = true;
3364 break;
3365 case CmpInst::ICMP_SLE:
3366 PredIdx = 8;
3367 SwapOperands = true;
3368 break;
3369 default:
3370 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3370)
;
3371 return false;
3372 }
3373
3374 // This table obviously should be tablegen'd when we have our GISel native
3375 // tablegen selector.
3376
3377 static const unsigned OpcTable[4][4][9] = {
3378 {
3379 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3380 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3381 0 /* invalid */},
3382 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3383 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3384 0 /* invalid */},
3385 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3386 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3387 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3388 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3389 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3390 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3391 },
3392 {
3393 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3394 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3395 0 /* invalid */},
3396 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3397 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3398 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3399 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3400 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3401 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3402 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3403 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3404 0 /* invalid */}
3405 },
3406 {
3407 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3408 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3409 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3410 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3411 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3412 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3413 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3414 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3415 0 /* invalid */},
3416 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3417 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3418 0 /* invalid */}
3419 },
3420 {
3421 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3422 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3423 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3424 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3425 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3426 0 /* invalid */},
3427 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3428 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3429 0 /* invalid */},
3430 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3431 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3432 0 /* invalid */}
3433 },
3434 };
3435 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3436 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3437 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3438 if (!Opc) {
3439 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3440 return false;
3441 }
3442
3443 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3444 const TargetRegisterClass *SrcRC =
3445 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3446 if (!SrcRC) {
3447 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3448 return false;
3449 }
3450
3451 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3452 if (SrcTy.getSizeInBits() == 128)
3453 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3454
3455 if (SwapOperands)
3456 std::swap(SrcReg, Src2Reg);
3457
3458 MachineIRBuilder MIB(I);
3459 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3460 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3461
3462 // Invert if we had a 'ne' cc.
3463 if (NotOpc) {
3464 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3465 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3466 } else {
3467 MIB.buildCopy(DstReg, Cmp.getReg(0));
3468 }
3469 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3470 I.eraseFromParent();
3471 return true;
3472}
3473
3474MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3475 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3476 MachineIRBuilder &MIRBuilder) const {
3477 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3478
3479 auto BuildFn = [&](unsigned SubregIndex) {
3480 auto Ins =
3481 MIRBuilder
3482 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3483 .addImm(SubregIndex);
3484 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3485 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3486 return &*Ins;
3487 };
3488
3489 switch (EltSize) {
3490 case 16:
3491 return BuildFn(AArch64::hsub);
3492 case 32:
3493 return BuildFn(AArch64::ssub);
3494 case 64:
3495 return BuildFn(AArch64::dsub);
3496 default:
3497 return nullptr;
3498 }
3499}
3500
3501bool AArch64InstructionSelector::selectMergeValues(
3502 MachineInstr &I, MachineRegisterInfo &MRI) const {
3503 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")((I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3503, __PRETTY_FUNCTION__))
;
3504 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3505 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3506 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")((!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"
) ? static_cast<void> (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3506, __PRETTY_FUNCTION__))
;
3507 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3508
3509 if (I.getNumOperands() != 3)
3510 return false;
3511
3512 // Merging 2 s64s into an s128.
3513 if (DstTy == LLT::scalar(128)) {
3514 if (SrcTy.getSizeInBits() != 64)
3515 return false;
3516 MachineIRBuilder MIB(I);
3517 Register DstReg = I.getOperand(0).getReg();
3518 Register Src1Reg = I.getOperand(1).getReg();
3519 Register Src2Reg = I.getOperand(2).getReg();
3520 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3521 MachineInstr *InsMI =
3522 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3523 if (!InsMI)
3524 return false;
3525 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3526 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3527 if (!Ins2MI)
3528 return false;
3529 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3530 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3531 I.eraseFromParent();
3532 return true;
3533 }
3534
3535 if (RB.getID() != AArch64::GPRRegBankID)
3536 return false;
3537
3538 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3539 return false;
3540
3541 auto *DstRC = &AArch64::GPR64RegClass;
3542 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3543 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3544 TII.get(TargetOpcode::SUBREG_TO_REG))
3545 .addDef(SubToRegDef)
3546 .addImm(0)
3547 .addUse(I.getOperand(1).getReg())
3548 .addImm(AArch64::sub_32);
3549 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3550 // Need to anyext the second scalar before we can use bfm
3551 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3552 TII.get(TargetOpcode::SUBREG_TO_REG))
3553 .addDef(SubToRegDef2)
3554 .addImm(0)
3555 .addUse(I.getOperand(2).getReg())
3556 .addImm(AArch64::sub_32);
3557 MachineInstr &BFM =
3558 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3559 .addDef(I.getOperand(0).getReg())
3560 .addUse(SubToRegDef)
3561 .addUse(SubToRegDef2)
3562 .addImm(32)
3563 .addImm(31);
3564 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3565 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3566 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3567 I.eraseFromParent();
3568 return true;
3569}
3570
3571static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3572 const unsigned EltSize) {
3573 // Choose a lane copy opcode and subregister based off of the size of the
3574 // vector's elements.
3575 switch (EltSize) {
3576 case 16:
3577 CopyOpc = AArch64::CPYi16;
3578 ExtractSubReg = AArch64::hsub;
3579 break;
3580 case 32:
3581 CopyOpc = AArch64::CPYi32;
3582 ExtractSubReg = AArch64::ssub;
3583 break;
3584 case 64:
3585 CopyOpc = AArch64::CPYi64;
3586 ExtractSubReg = AArch64::dsub;
3587 break;
3588 default:
3589 // Unknown size, bail out.
3590 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3591 return false;
3592 }
3593 return true;
3594}
3595
3596MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3597 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3598 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3599 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3600 unsigned CopyOpc = 0;
3601 unsigned ExtractSubReg = 0;
3602 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3603 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3604 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3605 return nullptr;
3606 }
3607
3608 const TargetRegisterClass *DstRC =
3609 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3610 if (!DstRC) {
3611 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3612 return nullptr;
3613 }
3614
3615 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3616 const LLT &VecTy = MRI.getType(VecReg);
3617 const TargetRegisterClass *VecRC =
3618 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3619 if (!VecRC) {
3620 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3621 return nullptr;
3622 }
3623
3624 // The register that we're going to copy into.
3625 Register InsertReg = VecReg;
3626 if (!DstReg)
3627 DstReg = MRI.createVirtualRegister(DstRC);
3628 // If the lane index is 0, we just use a subregister COPY.
3629 if (LaneIdx == 0) {
3630 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3631 .addReg(VecReg, 0, ExtractSubReg);
3632 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3633 return &*Copy;
3634 }
3635
3636 // Lane copies require 128-bit wide registers. If we're dealing with an
3637 // unpacked vector, then we need to move up to that width. Insert an implicit
3638 // def and a subregister insert to get us there.
3639 if (VecTy.getSizeInBits() != 128) {
3640 MachineInstr *ScalarToVector = emitScalarToVector(
3641 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3642 if (!ScalarToVector)
3643 return nullptr;
3644 InsertReg = ScalarToVector->getOperand(0).getReg();
3645 }
3646
3647 MachineInstr *LaneCopyMI =
3648 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3649 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3650
3651 // Make sure that we actually constrain the initial copy.
3652 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3653 return LaneCopyMI;
3654}
3655
3656bool AArch64InstructionSelector::selectExtractElt(
3657 MachineInstr &I, MachineRegisterInfo &MRI) const {
3658 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3659, __PRETTY_FUNCTION__))
3659 "unexpected opcode!")((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3659, __PRETTY_FUNCTION__))
;
3660 Register DstReg = I.getOperand(0).getReg();
3661 const LLT NarrowTy = MRI.getType(DstReg);
3662 const Register SrcReg = I.getOperand(1).getReg();
3663 const LLT WideTy = MRI.getType(SrcReg);
3664 (void)WideTy;
3665 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3666, __PRETTY_FUNCTION__))
3666 "source register size too small!")((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3666, __PRETTY_FUNCTION__))
;
3667 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")((!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? static_cast<void> (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3667, __PRETTY_FUNCTION__))
;
3668
3669 // Need the lane index to determine the correct copy opcode.
3670 MachineOperand &LaneIdxOp = I.getOperand(2);
3671 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")((LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? static_cast<void> (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3671, __PRETTY_FUNCTION__))
;
3672
3673 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3674 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
3675 return false;
3676 }
3677
3678 // Find the index to extract from.
3679 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3680 if (!VRegAndVal)
3681 return false;
3682 unsigned LaneIdx = VRegAndVal->Value;
3683
3684 MachineIRBuilder MIRBuilder(I);
3685
3686 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3687 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3688 LaneIdx, MIRBuilder);
3689 if (!Extract)
3690 return false;
3691
3692 I.eraseFromParent();
3693 return true;
3694}
3695
3696bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3697 MachineInstr &I, MachineRegisterInfo &MRI) const {
3698 unsigned NumElts = I.getNumOperands() - 1;
3699 Register SrcReg = I.getOperand(NumElts).getReg();
3700 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3701 const LLT SrcTy = MRI.getType(SrcReg);
3702
3703 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")((NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? static_cast<void> (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3703, __PRETTY_FUNCTION__))
;
3704 if (SrcTy.getSizeInBits() > 128) {
3705 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
3706 return false;
3707 }
3708
3709 MachineIRBuilder MIB(I);
3710
3711 // We implement a split vector operation by treating the sub-vectors as
3712 // scalars and extracting them.
3713 const RegisterBank &DstRB =
3714 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3715 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3716 Register Dst = I.getOperand(OpIdx).getReg();
3717 MachineInstr *Extract =
3718 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3719 if (!Extract)
3720 return false;
3721 }
3722 I.eraseFromParent();
3723 return true;
3724}
3725
3726bool AArch64InstructionSelector::selectUnmergeValues(
3727 MachineInstr &I, MachineRegisterInfo &MRI) const {
3728 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3729, __PRETTY_FUNCTION__))
3729 "unexpected opcode")((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3729, __PRETTY_FUNCTION__))
;
3730
3731 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3732 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3733 AArch64::FPRRegBankID ||
3734 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3735 AArch64::FPRRegBankID) {
3736 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
3737 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
3738 return false;
3739 }
3740
3741 // The last operand is the vector source register, and every other operand is
3742 // a register to unpack into.
3743 unsigned NumElts = I.getNumOperands() - 1;
3744 Register SrcReg = I.getOperand(NumElts).getReg();
3745 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3746 const LLT WideTy = MRI.getType(SrcReg);
3747 (void)WideTy;
3748 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3749, __PRETTY_FUNCTION__))
3749 "can only unmerge from vector or s128 types!")(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3749, __PRETTY_FUNCTION__))
;
3750 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3751, __PRETTY_FUNCTION__))
3751 "source register size too small!")((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3751, __PRETTY_FUNCTION__))
;
3752
3753 if (!NarrowTy.isScalar())
3754 return selectSplitVectorUnmerge(I, MRI);
3755
3756 MachineIRBuilder MIB(I);
3757
3758 // Choose a lane copy opcode and subregister based off of the size of the
3759 // vector's elements.
3760 unsigned CopyOpc = 0;
3761 unsigned ExtractSubReg = 0;
3762 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3763 return false;
3764
3765 // Set up for the lane copies.
3766 MachineBasicBlock &MBB = *I.getParent();
3767
3768 // Stores the registers we'll be copying from.
3769 SmallVector<Register, 4> InsertRegs;
3770
3771 // We'll use the first register twice, so we only need NumElts-1 registers.
3772 unsigned NumInsertRegs = NumElts - 1;
3773
3774 // If our elements fit into exactly 128 bits, then we can copy from the source
3775 // directly. Otherwise, we need to do a bit of setup with some subregister
3776 // inserts.
3777 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3778 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3779 } else {
3780 // No. We have to perform subregister inserts. For each insert, create an
3781 // implicit def and a subregister insert, and save the register we create.
3782 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3783 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3784 MachineInstr &ImpDefMI =
3785 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3786 ImpDefReg);
3787
3788 // Now, create the subregister insert from SrcReg.
3789 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3790 MachineInstr &InsMI =
3791 *BuildMI(MBB, I, I.getDebugLoc(),
3792 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3793 .addUse(ImpDefReg)
3794 .addUse(SrcReg)
3795 .addImm(AArch64::dsub);
3796
3797 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3798 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3799
3800 // Save the register so that we can copy from it after.
3801 InsertRegs.push_back(InsertReg);
3802 }
3803 }
3804
3805 // Now that we've created any necessary subregister inserts, we can
3806 // create the copies.
3807 //
3808 // Perform the first copy separately as a subregister copy.
3809 Register CopyTo = I.getOperand(0).getReg();
3810 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3811 .addReg(InsertRegs[0], 0, ExtractSubReg);
3812 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3813
3814 // Now, perform the remaining copies as vector lane copies.
3815 unsigned LaneIdx = 1;
3816 for (Register InsReg : InsertRegs) {
3817 Register CopyTo = I.getOperand(LaneIdx).getReg();
3818 MachineInstr &CopyInst =
3819 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3820 .addUse(InsReg)
3821 .addImm(LaneIdx);
3822 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3823 ++LaneIdx;
3824 }
3825
3826 // Separately constrain the first copy's destination. Because of the
3827 // limitation in constrainOperandRegClass, we can't guarantee that this will
3828 // actually be constrained. So, do it ourselves using the second operand.
3829 const TargetRegisterClass *RC =
3830 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3831 if (!RC) {
3832 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
3833 return false;
3834 }
3835
3836 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3837 I.eraseFromParent();
3838 return true;
3839}
3840
3841bool AArch64InstructionSelector::selectConcatVectors(
3842 MachineInstr &I, MachineRegisterInfo &MRI) const {
3843 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3844, __PRETTY_FUNCTION__))
3844 "Unexpected opcode")((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3844, __PRETTY_FUNCTION__))
;
3845 Register Dst = I.getOperand(0).getReg();
3846 Register Op1 = I.getOperand(1).getReg();
3847 Register Op2 = I.getOperand(2).getReg();
3848 MachineIRBuilder MIRBuilder(I);
3849 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3850 if (!ConcatMI)
3851 return false;
3852 I.eraseFromParent();
3853 return true;
3854}
3855
3856unsigned
3857AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3858 MachineFunction &MF) const {
3859 Type *CPTy = CPVal->getType();
3860 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3861
3862 MachineConstantPool *MCP = MF.getConstantPool();
3863 return MCP->getConstantPoolIndex(CPVal, Alignment);
3864}
3865
3866MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3867 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3868 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3869
3870 auto Adrp =
3871 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3872 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3873
3874 MachineInstr *LoadMI = nullptr;
3875 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3876 case 16:
3877 LoadMI =
3878 &*MIRBuilder
3879 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3880 .addConstantPoolIndex(CPIdx, 0,
3881 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3882 break;
3883 case 8:
3884 LoadMI = &*MIRBuilder
3885 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3886 .addConstantPoolIndex(
3887 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3888 break;
3889 default:
3890 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
3891 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
3892 return nullptr;
3893 }
3894 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3895 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3896 return LoadMI;
3897}
3898
3899/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3900/// size and RB.
3901static std::pair<unsigned, unsigned>
3902getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3903 unsigned Opc, SubregIdx;
3904 if (RB.getID() == AArch64::GPRRegBankID) {
3905 if (EltSize == 16) {
3906 Opc = AArch64::INSvi16gpr;
3907 SubregIdx = AArch64::ssub;
3908 } else if (EltSize == 32) {
3909 Opc = AArch64::INSvi32gpr;
3910 SubregIdx = AArch64::ssub;
3911 } else if (EltSize == 64) {
3912 Opc = AArch64::INSvi64gpr;
3913 SubregIdx = AArch64::dsub;
3914 } else {
3915 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3915)
;
3916 }
3917 } else {
3918 if (EltSize == 8) {
3919 Opc = AArch64::INSvi8lane;
3920 SubregIdx = AArch64::bsub;
3921 } else if (EltSize == 16) {
3922 Opc = AArch64::INSvi16lane;
3923 SubregIdx = AArch64::hsub;
3924 } else if (EltSize == 32) {
3925 Opc = AArch64::INSvi32lane;
3926 SubregIdx = AArch64::ssub;
3927 } else if (EltSize == 64) {
3928 Opc = AArch64::INSvi64lane;
3929 SubregIdx = AArch64::dsub;
3930 } else {
3931 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3931)
;
3932 }
3933 }
3934 return std::make_pair(Opc, SubregIdx);
3935}
3936
3937MachineInstr *AArch64InstructionSelector::emitInstr(
3938 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
3939 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
3940 const ComplexRendererFns &RenderFns) const {
3941 assert(Opcode && "Expected an opcode?")((Opcode && "Expected an opcode?") ? static_cast<void
> (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3941, __PRETTY_FUNCTION__))
;
3942 assert(!isPreISelGenericOpcode(Opcode) &&((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3943, __PRETTY_FUNCTION__))
3943 "Function should only be used to produce selected instructions!")((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3943, __PRETTY_FUNCTION__))
;
3944 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
3945 if (RenderFns)
3946 for (auto &Fn : *RenderFns)
3947 Fn(MI);
3948 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
3949 return &*MI;
3950}
3951
3952MachineInstr *AArch64InstructionSelector::emitAddSub(
3953 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
3954 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
3955 MachineIRBuilder &MIRBuilder) const {
3956 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3957 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3957, __PRETTY_FUNCTION__))
;
2
'?' condition is true
3958 auto Ty = MRI.getType(LHS.getReg());
3959 assert(!Ty.isVector() && "Expected a scalar or pointer?")((!Ty.isVector() && "Expected a scalar or pointer?") ?
static_cast<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3959, __PRETTY_FUNCTION__))
;
3
'?' condition is true
3960 unsigned Size = Ty.getSizeInBits();
3961 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3961, __PRETTY_FUNCTION__))
;
4
Assuming 'Size' is not equal to 32
5
Assuming 'Size' is equal to 64
6
'?' condition is true
3962 bool Is32Bit = Size == 32;
3963
3964 // INSTRri form with positive arithmetic immediate.
3965 if (auto Fns = selectArithImmed(RHS))
7
Taking false branch
3966 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
3967 MIRBuilder, Fns);
3968
3969 // INSTRri form with negative arithmetic immediate.
3970 if (auto Fns = selectNegArithImmed(RHS))
8
Taking false branch
3971 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
3972 MIRBuilder, Fns);
3973
3974 // INSTRrx form.
3975 if (auto Fns = selectArithExtendedRegister(RHS))
9
Calling 'AArch64InstructionSelector::selectArithExtendedRegister'
3976 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
3977 MIRBuilder, Fns);
3978
3979 // INSTRrs form.
3980 if (auto Fns = selectShiftedRegister(RHS))
3981 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
3982 MIRBuilder, Fns);
3983 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
3984 MIRBuilder);
3985}
3986
3987MachineInstr *
3988AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3989 MachineOperand &RHS,
3990 MachineIRBuilder &MIRBuilder) const {
3991 const std::array<std::array<unsigned, 2>, 5> OpcTable{
3992 {{AArch64::ADDXri, AArch64::ADDWri},
3993 {AArch64::ADDXrs, AArch64::ADDWrs},
3994 {AArch64::ADDXrr, AArch64::ADDWrr},
3995 {AArch64::SUBXri, AArch64::SUBWri},
3996 {AArch64::ADDXrx, AArch64::ADDWrx}}};
3997 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
1
Calling 'AArch64InstructionSelector::emitAddSub'
3998}
3999
4000MachineInstr *
4001AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4002 MachineOperand &RHS,
4003 MachineIRBuilder &MIRBuilder) const {
4004 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4005 {{AArch64::ADDSXri, AArch64::ADDSWri},
4006 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4007 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4008 {AArch64::SUBSXri, AArch64::SUBSWri},
4009 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4010 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4011}
4012
4013MachineInstr *
4014AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4015 MachineOperand &RHS,
4016 MachineIRBuilder &MIRBuilder) const {
4017 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4018 {{AArch64::SUBSXri, AArch64::SUBSWri},
4019 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4020 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4021 {AArch64::ADDSXri, AArch64::ADDSWri},
4022 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4023 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4024}
4025
4026MachineInstr *
4027AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4028 MachineIRBuilder &MIRBuilder) const {
4029 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4030 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4031 return emitADDS(Is32Bit ? AArch64::WZR : AArch64::XZR, LHS, RHS, MIRBuilder);
4032}
4033
4034MachineInstr *
4035AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4036 MachineIRBuilder &MIRBuilder) const {
4037 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4037, __PRETTY_FUNCTION__))
;
4038 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4039 LLT Ty = MRI.getType(LHS.getReg());
4040 unsigned RegSize = Ty.getSizeInBits();
4041 bool Is32Bit = (RegSize == 32);
4042 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4043 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4044 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4045 // ANDS needs a logical immediate for its immediate form. Check if we can
4046 // fold one in.
4047 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4048 if (AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize)) {
4049 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4050 TstMI.addImm(
4051 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
4052 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4053 return &*TstMI;
4054 }
4055 }
4056
4057 if (auto Fns = selectLogicalShiftedRegister(RHS))
4058 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4059 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4060}
4061
4062MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4063 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4064 MachineIRBuilder &MIRBuilder) const {
4065 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")((LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4065, __PRETTY_FUNCTION__))
;
4066 assert(Predicate.isPredicate() && "Expected predicate?")((Predicate.isPredicate() && "Expected predicate?") ?
static_cast<void> (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4066, __PRETTY_FUNCTION__))
;
4067 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4068 LLT CmpTy = MRI.getType(LHS.getReg());
4069 assert(!CmpTy.isVector() && "Expected scalar or pointer")((!CmpTy.isVector() && "Expected scalar or pointer") ?
static_cast<void> (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4069, __PRETTY_FUNCTION__))
;
4070 unsigned Size = CmpTy.getSizeInBits();
4071 (void)Size;
4072 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4072, __PRETTY_FUNCTION__))
;
4073 // Fold the compare into a cmn or tst if possible.
4074 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4075 return FoldCmp;
4076 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4077 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4078}
4079
4080MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4081 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4082 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4083#ifndef NDEBUG
4084 LLT Ty = MRI.getType(Dst);
4085 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4086, __PRETTY_FUNCTION__))
4086 "Expected a 32-bit scalar register?")((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4086, __PRETTY_FUNCTION__))
;
4087#endif
4088 const Register ZeroReg = AArch64::WZR;
4089 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4090 auto CSet =
4091 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4092 .addImm(getInvertedCondCode(CC));
4093 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4094 return &*CSet;
4095 };
4096
4097 AArch64CC::CondCode CC1, CC2;
4098 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4099 if (CC2 == AArch64CC::AL)
4100 return EmitCSet(Dst, CC1);
4101
4102 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4103 Register Def1Reg = MRI.createVirtualRegister(RC);
4104 Register Def2Reg = MRI.createVirtualRegister(RC);
4105 EmitCSet(Def1Reg, CC1);
4106 EmitCSet(Def2Reg, CC2);
4107 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4108 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4109 return &*OrMI;
4110}
4111
4112MachineInstr *
4113AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4114 MachineIRBuilder &MIRBuilder) const {
4115 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4116 LLT Ty = MRI.getType(LHS);
4117 if (Ty.isVector())
4118 return nullptr;
4119 unsigned OpSize = Ty.getSizeInBits();
4120 if (OpSize != 32 && OpSize != 64)
4121 return nullptr;
4122
4123 // If this is a compare against +0.0, then we don't have
4124 // to explicitly materialize a constant.
4125 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4126 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4127 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4128 {AArch64::FCMPSri, AArch64::FCMPDri}};
4129 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4130
4131 // Partially build the compare. Decide if we need to add a use for the
4132 // third operand based off whether or not we're comparing against 0.0.
4133 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4134 if (!ShouldUseImm)
4135 CmpMI.addUse(RHS);
4136 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4137 return &*CmpMI;
4138}
4139
4140MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4141 Optional<Register> Dst, Register Op1, Register Op2,
4142 MachineIRBuilder &MIRBuilder) const {
4143 // We implement a vector concat by:
4144 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4145 // 2. Insert the upper vector into the destination's upper element
4146 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4147 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4148
4149 const LLT Op1Ty = MRI.getType(Op1);
4150 const LLT Op2Ty = MRI.getType(Op2);
4151
4152 if (Op1Ty != Op2Ty) {
4153 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4154 return nullptr;
4155 }
4156 assert(Op1Ty.isVector() && "Expected a vector for vector concat")((Op1Ty.isVector() && "Expected a vector for vector concat"
) ? static_cast<void> (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4156, __PRETTY_FUNCTION__))
;
4157
4158 if (Op1Ty.getSizeInBits() >= 128) {
4159 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4160 return nullptr;
4161 }
4162
4163 // At the moment we just support 64 bit vector concats.
4164 if (Op1Ty.getSizeInBits() != 64) {
4165 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4166 return nullptr;
4167 }
4168
4169 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4170 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4171 const TargetRegisterClass *DstRC =
4172 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4173
4174 MachineInstr *WidenedOp1 =
4175 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4176 MachineInstr *WidenedOp2 =
4177 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4178 if (!WidenedOp1 || !WidenedOp2) {
4179 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4180 return nullptr;
4181 }
4182
4183 // Now do the insert of the upper element.
4184 unsigned InsertOpc, InsSubRegIdx;
4185 std::tie(InsertOpc, InsSubRegIdx) =
4186 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4187
4188 if (!Dst)
4189 Dst = MRI.createVirtualRegister(DstRC);
4190 auto InsElt =
4191 MIRBuilder
4192 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4193 .addImm(1) /* Lane index */
4194 .addUse(WidenedOp2->getOperand(0).getReg())
4195 .addImm(0);
4196 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4197 return &*InsElt;
4198}
4199
4200MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
4201 MachineInstr &I, MachineRegisterInfo &MRI) const {
4202 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4203, __PRETTY_FUNCTION__))
4203 "Expected a G_FCONSTANT!")((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4203, __PRETTY_FUNCTION__))
;
4204 MachineOperand &ImmOp = I.getOperand(1);
4205 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
4206
4207 // Only handle 32 and 64 bit defs for now.
4208 if (DefSize != 32 && DefSize != 64)
4209 return nullptr;
4210
4211 // Don't handle null values using FMOV.
4212 if (ImmOp.getFPImm()->isNullValue())
4213 return nullptr;
4214
4215 // Get the immediate representation for the FMOV.
4216 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
4217 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
4218 : AArch64_AM::getFP64Imm(ImmValAPF);
4219
4220 // If this is -1, it means the immediate can't be represented as the requested
4221 // floating point value. Bail.
4222 if (Imm == -1)
4223 return nullptr;
4224
4225 // Update MI to represent the new FMOV instruction, constrain it, and return.
4226 ImmOp.ChangeToImmediate(Imm);
4227 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
4228 I.setDesc(TII.get(MovOpc));
4229 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4230 return &I;
4231}
4232
4233MachineInstr *
4234AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4235 MachineIRBuilder &MIRBuilder) const {
4236 // CSINC increments the result when the predicate is false. Invert it.
4237 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4238 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4239 auto I =
4240 MIRBuilder
4241 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
4242 .addImm(InvCC);
4243 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4244 return &*I;
4245}
4246
4247bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4248 MachineIRBuilder MIB(I);
4249 MachineRegisterInfo &MRI = *MIB.getMRI();
4250 // We want to recognize this pattern:
4251 //
4252 // $z = G_FCMP pred, $x, $y
4253 // ...
4254 // $w = G_SELECT $z, $a, $b
4255 //
4256 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4257 // some copies/truncs in between.)
4258 //
4259 // If we see this, then we can emit something like this:
4260 //
4261 // fcmp $x, $y
4262 // fcsel $w, $a, $b, pred
4263 //
4264 // Rather than emitting both of the rather long sequences in the standard
4265 // G_FCMP/G_SELECT select methods.
4266
4267 // First, check if the condition is defined by a compare.
4268 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4269 while (CondDef) {
4270 // We can only fold if all of the defs have one use.
4271 Register CondDefReg = CondDef->getOperand(0).getReg();
4272 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4273 // Unless it's another select.
4274 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4275 if (CondDef == &UI)
4276 continue;
4277 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4278 return false;
4279 }
4280 }
4281
4282 // We can skip over G_TRUNC since the condition is 1-bit.
4283 // Truncating/extending can have no impact on the value.
4284 unsigned Opc = CondDef->getOpcode();
4285 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4286 break;
4287
4288 // Can't see past copies from physregs.
4289 if (Opc == TargetOpcode::COPY &&
4290 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4291 return false;
4292
4293 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4294 }
4295
4296 // Is the condition defined by a compare?
4297 if (!CondDef)
4298 return false;
4299
4300 unsigned CondOpc = CondDef->getOpcode();
4301 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4302 return false;
4303
4304 AArch64CC::CondCode CondCode;
4305 if (CondOpc == TargetOpcode::G_ICMP) {
4306 auto Pred =
4307 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4308 CondCode = changeICMPPredToAArch64CC(Pred);
4309 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4310 CondDef->getOperand(1), MIB);
4311 } else {
4312 // Get the condition code for the select.
4313 auto Pred =
4314 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4315 AArch64CC::CondCode CondCode2;
4316 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4317
4318 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4319 // instructions to emit the comparison.
4320 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4321 // unnecessary.
4322 if (CondCode2 != AArch64CC::AL)
4323 return false;
4324
4325 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4326 CondDef->getOperand(3).getReg(), MIB)) {
4327 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4328 return false;
4329 }
4330 }
4331
4332 // Emit the select.
4333 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4334 I.getOperand(3).getReg(), CondCode, MIB);
4335 I.eraseFromParent();
4336 return true;
4337}
4338
4339MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4340 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4341 MachineIRBuilder &MIRBuilder) const {
4342 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4343, __PRETTY_FUNCTION__))
4343 "Unexpected MachineOperand")((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4343, __PRETTY_FUNCTION__))
;
4344 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4345 // We want to find this sort of thing:
4346 // x = G_SUB 0, y
4347 // G_ICMP z, x
4348 //
4349 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4350 // e.g:
4351 //
4352 // cmn z, y
4353
4354 // Helper lambda to detect the subtract followed by the compare.
4355 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4356 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4357 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4358 return false;
4359
4360 // Need to make sure NZCV is the same at the end of the transformation.
4361 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4362 return false;
4363
4364 // We want to match against SUBs.
4365 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4366 return false;
4367
4368 // Make sure that we're getting
4369 // x = G_SUB 0, y
4370 auto ValAndVReg =
4371 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4372 if (!ValAndVReg || ValAndVReg->Value != 0)
4373 return false;
4374
4375 // This can safely be represented as a CMN.
4376 return true;
4377 };
4378
4379 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4380 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4381 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4382 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4383 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4384
4385 // Given this:
4386 //
4387 // x = G_SUB 0, y
4388 // G_ICMP x, z
4389 //
4390 // Produce this:
4391 //
4392 // cmn y, z
4393 if (IsCMN(LHSDef, CC))
4394 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4395
4396 // Same idea here, but with the RHS of the compare instead:
4397 //
4398 // Given this:
4399 //
4400 // x = G_SUB 0, y
4401 // G_ICMP z, x
4402 //
4403 // Produce this:
4404 //
4405 // cmn z, y
4406 if (IsCMN(RHSDef, CC))
4407 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4408
4409 // Given this:
4410 //
4411 // z = G_AND x, y
4412 // G_ICMP z, 0
4413 //
4414 // Produce this if the compare is signed:
4415 //
4416 // tst x, y
4417 if (!CmpInst::isUnsigned(P) && LHSDef &&
4418 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4419 // Make sure that the RHS is 0.
4420 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4421 if (!ValAndVReg || ValAndVReg->Value != 0)
4422 return nullptr;
4423
4424 return emitTST(LHSDef->getOperand(1),
4425 LHSDef->getOperand(2), MIRBuilder);
4426 }
4427
4428 return nullptr;
4429}
4430
4431bool AArch64InstructionSelector::selectShuffleVector(
4432 MachineInstr &I, MachineRegisterInfo &MRI) const {
4433 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4434 Register Src1Reg = I.getOperand(1).getReg();
4435 const LLT Src1Ty = MRI.getType(Src1Reg);
4436 Register Src2Reg = I.getOperand(2).getReg();
4437 const LLT Src2Ty = MRI.getType(Src2Reg);
4438 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4439
4440 MachineBasicBlock &MBB = *I.getParent();
4441 MachineFunction &MF = *MBB.getParent();
4442 LLVMContext &Ctx = MF.getFunction().getContext();
4443
4444 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4445 // it's originated from a <1 x T> type. Those should have been lowered into
4446 // G_BUILD_VECTOR earlier.
4447 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4448 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4449 return false;
4450 }
4451
4452 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4453
4454 SmallVector<Constant *, 64> CstIdxs;
4455 for (int Val : Mask) {
4456 // For now, any undef indexes we'll just assume to be 0. This should be
4457 // optimized in future, e.g. to select DUP etc.
4458 Val = Val < 0 ? 0 : Val;
4459 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4460 unsigned Offset = Byte + Val * BytesPerElt;
4461 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4462 }
4463 }
4464
4465 MachineIRBuilder MIRBuilder(I);
4466
4467 // Use a constant pool to load the index vector for TBL.
4468 Constant *CPVal = ConstantVector::get(CstIdxs);
4469 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4470 if (!IndexLoad) {
4471 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4472 return false;
4473 }
4474
4475 if (DstTy.getSizeInBits() != 128) {
4476 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")((DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"
) ? static_cast<void> (0) : __assert_fail ("DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4476, __PRETTY_FUNCTION__))
;
4477 // This case can be done with TBL1.
4478 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4479 if (!Concat) {
4480 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4481 return false;
4482 }
4483
4484 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4485 IndexLoad =
4486 emitScalarToVector(64, &AArch64::FPR128RegClass,
4487 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4488
4489 auto TBL1 = MIRBuilder.buildInstr(
4490 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4491 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4492 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4493
4494 auto Copy =
4495 MIRBuilder
4496 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4497 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4498 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4499 I.eraseFromParent();
4500 return true;
4501 }
4502
4503 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4504 // Q registers for regalloc.
4505 auto RegSeq = MIRBuilder
4506 .buildInstr(TargetOpcode::REG_SEQUENCE,
4507 {&AArch64::QQRegClass}, {Src1Reg})
4508 .addImm(AArch64::qsub0)
4509 .addUse(Src2Reg)
4510 .addImm(AArch64::qsub1);
4511
4512 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4513 {RegSeq, IndexLoad->getOperand(0)});
4514 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4515 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4516 I.eraseFromParent();
4517 return true;
4518}
4519
4520MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4521 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4522 unsigned LaneIdx, const RegisterBank &RB,
4523 MachineIRBuilder &MIRBuilder) const {
4524 MachineInstr *InsElt = nullptr;
4525 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4526 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4527
4528 // Create a register to define with the insert if one wasn't passed in.
4529 if (!DstReg)
4530 DstReg = MRI.createVirtualRegister(DstRC);
4531
4532 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4533 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4534
4535 if (RB.getID() == AArch64::FPRRegBankID) {
4536 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4537 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4538 .addImm(LaneIdx)
4539 .addUse(InsSub->getOperand(0).getReg())
4540 .addImm(0);
4541 } else {
4542 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4543 .addImm(LaneIdx)
4544 .addUse(EltReg);
4545 }
4546
4547 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4548 return InsElt;
4549}
4550
4551bool AArch64InstructionSelector::selectInsertElt(
4552 MachineInstr &I, MachineRegisterInfo &MRI) const {
4553 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)((I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4553, __PRETTY_FUNCTION__))
;
4554
4555 // Get information on the destination.
4556 Register DstReg = I.getOperand(0).getReg();
4557 const LLT DstTy = MRI.getType(DstReg);
4558 unsigned VecSize = DstTy.getSizeInBits();
4559
4560 // Get information on the element we want to insert into the destination.
4561 Register EltReg = I.getOperand(2).getReg();
4562 const LLT EltTy = MRI.getType(EltReg);
4563 unsigned EltSize = EltTy.getSizeInBits();
4564 if (EltSize < 16 || EltSize > 64)
4565 return false; // Don't support all element types yet.
4566
4567 // Find the definition of the index. Bail out if it's not defined by a
4568 // G_CONSTANT.
4569 Register IdxReg = I.getOperand(3).getReg();
4570 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4571 if (!VRegAndVal)
4572 return false;
4573 unsigned LaneIdx = VRegAndVal->Value;
4574
4575 // Perform the lane insert.
4576 Register SrcReg = I.getOperand(1).getReg();
4577 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4578 MachineIRBuilder MIRBuilder(I);
4579
4580 if (VecSize < 128) {
4581 // If the vector we're inserting into is smaller than 128 bits, widen it
4582 // to 128 to do the insert.
4583 MachineInstr *ScalarToVec = emitScalarToVector(
4584 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4585 if (!ScalarToVec)
4586 return false;
4587 SrcReg = ScalarToVec->getOperand(0).getReg();
4588 }
4589
4590 // Create an insert into a new FPR128 register.
4591 // Note that if our vector is already 128 bits, we end up emitting an extra
4592 // register.
4593 MachineInstr *InsMI =
4594 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4595
4596 if (VecSize < 128) {
4597 // If we had to widen to perform the insert, then we have to demote back to
4598 // the original size to get the result we want.
4599 Register DemoteVec = InsMI->getOperand(0).getReg();
4600 const TargetRegisterClass *RC =
4601 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4602 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4603 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4604 return false;
4605 }
4606 unsigned SubReg = 0;
4607 if (!getSubRegForClass(RC, TRI, SubReg))
4608 return false;
4609 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4610 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
4611 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
4612 return false;
4613 }
4614 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4615 .addReg(DemoteVec, 0, SubReg);
4616 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4617 } else {
4618 // No widening needed.
4619 InsMI->getOperand(0).setReg(DstReg);
4620 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4621 }
4622
4623 I.eraseFromParent();
4624 return true;
4625}
4626
4627bool AArch64InstructionSelector::tryOptConstantBuildVec(
4628 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4629 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4629, __PRETTY_FUNCTION__))
;
4630 unsigned DstSize = DstTy.getSizeInBits();
4631 assert(DstSize <= 128 && "Unexpected build_vec type!")((DstSize <= 128 && "Unexpected build_vec type!") ?
static_cast<void> (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4631, __PRETTY_FUNCTION__))
;
4632 if (DstSize < 32)
4633 return false;
4634 // Check if we're building a constant vector, in which case we want to
4635 // generate a constant pool load instead of a vector insert sequence.
4636 SmallVector<Constant *, 16> Csts;
4637 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4638 // Try to find G_CONSTANT or G_FCONSTANT
4639 auto *OpMI =
4640 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4641 if (OpMI)
4642 Csts.emplace_back(
4643 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4644 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4645 I.getOperand(Idx).getReg(), MRI)))
4646 Csts.emplace_back(
4647 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4648 else
4649 return false;
4650 }
4651 Constant *CV = ConstantVector::get(Csts);
4652 MachineIRBuilder MIB(I);
4653 if (CV->isNullValue()) {
4654 // Until the importer can support immAllZerosV in pattern leaf nodes,
4655 // select a zero move manually here.
4656 Register DstReg = I.getOperand(0).getReg();
4657 if (DstSize == 128) {
4658 auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
4659 I.eraseFromParent();
4660 return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4661 } else if (DstSize == 64) {
4662 auto Mov =
4663 MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4664 .addImm(0);
4665 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4666 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4667 I.eraseFromParent();
4668 return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
4669 }
4670 }
4671 auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4672 if (!CPLoad) {
4673 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for build_vector"
; } } while (false)
;
4674 return false;
4675 }
4676 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4677 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4678 *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4679 MRI);
4680 I.eraseFromParent();
4681 return true;
4682}
4683
4684bool AArch64InstructionSelector::selectBuildVector(
4685 MachineInstr &I, MachineRegisterInfo &MRI) const {
4686 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4686, __PRETTY_FUNCTION__))
;
4687 // Until we port more of the optimized selections, for now just use a vector
4688 // insert sequence.
4689 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4690 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4691 unsigned EltSize = EltTy.getSizeInBits();
4692
4693 if (tryOptConstantBuildVec(I, DstTy, MRI))
4694 return true;
4695 if (EltSize < 16 || EltSize > 64)
4696 return false; // Don't support all element types yet.
4697 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4698 MachineIRBuilder MIRBuilder(I);
4699
4700 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4701 MachineInstr *ScalarToVec =
4702 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4703 I.getOperand(1).getReg(), MIRBuilder);
4704 if (!ScalarToVec)
4705 return false;
4706
4707 Register DstVec = ScalarToVec->getOperand(0).getReg();
4708 unsigned DstSize = DstTy.getSizeInBits();
4709
4710 // Keep track of the last MI we inserted. Later on, we might be able to save
4711 // a copy using it.
4712 MachineInstr *PrevMI = nullptr;
4713 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4714 // Note that if we don't do a subregister copy, we can end up making an
4715 // extra register.
4716 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4717 MIRBuilder);
4718 DstVec = PrevMI->getOperand(0).getReg();
4719 }
4720
4721 // If DstTy's size in bits is less than 128, then emit a subregister copy
4722 // from DstVec to the last register we've defined.
4723 if (DstSize < 128) {
4724 // Force this to be FPR using the destination vector.
4725 const TargetRegisterClass *RC =
4726 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4727 if (!RC)
4728 return false;
4729 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4730 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4731 return false;
4732 }
4733
4734 unsigned SubReg = 0;
4735 if (!getSubRegForClass(RC, TRI, SubReg))
4736 return false;
4737 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4738 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
4739 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
4740 return false;
4741 }
4742
4743 Register Reg = MRI.createVirtualRegister(RC);
4744 Register DstReg = I.getOperand(0).getReg();
4745
4746 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4747 .addReg(DstVec, 0, SubReg);
4748 MachineOperand &RegOp = I.getOperand(1);
4749 RegOp.setReg(Reg);
4750 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4751 } else {
4752 // We don't need a subregister copy. Save a copy by re-using the
4753 // destination register on the final insert.
4754 assert(PrevMI && "PrevMI was null?")((PrevMI && "PrevMI was null?") ? static_cast<void
> (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4754, __PRETTY_FUNCTION__))
;
4755 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4756 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4757 }
4758
4759 I.eraseFromParent();
4760 return true;
4761}
4762
4763/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4764/// ID if it exists, and 0 otherwise.
4765static unsigned findIntrinsicID(MachineInstr &I) {
4766 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4767 return Op.isIntrinsicID();
4768 });
4769 if (IntrinOp == I.operands_end())
4770 return 0;
4771 return IntrinOp->getIntrinsicID();
4772}
4773
4774bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4775 MachineInstr &I, MachineRegisterInfo &MRI) const {
4776 // Find the intrinsic ID.
4777 unsigned IntrinID = findIntrinsicID(I);
4778 if (!IntrinID)
4779 return false;
4780 MachineIRBuilder MIRBuilder(I);
4781
4782 // Select the instruction.
4783 switch (IntrinID) {
4784 default:
4785 return false;
4786 case Intrinsic::trap:
4787 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4788 break;
4789 case Intrinsic::debugtrap:
4790 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4791 break;
4792 }
4793
4794 I.eraseFromParent();
4795 return true;
4796}
4797
4798bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4799 MachineRegisterInfo &MRI) {
4800 unsigned IntrinID = findIntrinsicID(I);
4801 if (!IntrinID)
4802 return false;
4803 MachineIRBuilder MIRBuilder(I);
4804
4805 switch (IntrinID) {
4806 default:
4807 break;
4808 case Intrinsic::aarch64_crypto_sha1h: {
4809 Register DstReg = I.getOperand(0).getReg();
4810 Register SrcReg = I.getOperand(2).getReg();
4811
4812 // FIXME: Should this be an assert?
4813 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4814 MRI.getType(SrcReg).getSizeInBits() != 32)
4815 return false;
4816
4817 // The operation has to happen on FPRs. Set up some new FPR registers for
4818 // the source and destination if they are on GPRs.
4819 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4820 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4821 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4822
4823 // Make sure the copy ends up getting constrained properly.
4824 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4825 AArch64::GPR32RegClass, MRI);
4826 }
4827
4828 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4829 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4830
4831 // Actually insert the instruction.
4832 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4833 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4834
4835 // Did we create a new register for the destination?
4836 if (DstReg != I.getOperand(0).getReg()) {
4837 // Yep. Copy the result of the instruction back into the original
4838 // destination.
4839 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4840 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4841 AArch64::GPR32RegClass, MRI);
4842 }
4843
4844 I.eraseFromParent();
4845 return true;
4846 }
4847 case Intrinsic::frameaddress:
4848 case Intrinsic::returnaddress: {
4849 MachineFunction &MF = *I.getParent()->getParent();
4850 MachineFrameInfo &MFI = MF.getFrameInfo();
4851
4852 unsigned Depth = I.getOperand(2).getImm();
4853 Register DstReg = I.getOperand(0).getReg();
4854 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4855
4856 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4857 if (!MFReturnAddr) {
4858 // Insert the copy from LR/X30 into the entry block, before it can be
4859 // clobbered by anything.
4860 MFI.setReturnAddressIsTaken(true);
4861 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
4862 AArch64::GPR64RegClass);
4863 }
4864
4865 if (STI.hasV8_3aOps()) {
4866 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
4867 } else {
4868 MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
4869 MIRBuilder.buildInstr(AArch64::XPACLRI);
4870 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4871 }
4872
4873 I.eraseFromParent();
4874 return true;
4875 }
4876
4877 MFI.setFrameAddressIsTaken(true);
4878 Register FrameAddr(AArch64::FP);
4879 while (Depth--) {
4880 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4881 auto Ldr =
4882 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4883 .addImm(0);
4884 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4885 FrameAddr = NextFrame;
4886 }
4887
4888 if (IntrinID == Intrinsic::frameaddress)
4889 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4890 else {
4891 MFI.setReturnAddressIsTaken(true);
4892
4893 if (STI.hasV8_3aOps()) {
4894 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4895 MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
4896 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
4897 } else {
4898 MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
4899 MIRBuilder.buildInstr(AArch64::XPACLRI);
4900 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4901 }
4902 }
4903
4904 I.eraseFromParent();
4905 return true;
4906 }
4907 }
4908 return false;
4909}
4910
4911InstructionSelector::ComplexRendererFns
4912AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4913 auto MaybeImmed = getImmedFromMO(Root);
4914 if (MaybeImmed == None || *MaybeImmed > 31)
4915 return None;
4916 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4917 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4918}
4919
4920InstructionSelector::ComplexRendererFns
4921AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4922 auto MaybeImmed = getImmedFromMO(Root);
4923 if (MaybeImmed == None || *MaybeImmed > 31)
4924 return None;
4925 uint64_t Enc = 31 - *MaybeImmed;
4926 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4927}
4928
4929InstructionSelector::ComplexRendererFns
4930AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4931 auto MaybeImmed = getImmedFromMO(Root);
4932 if (MaybeImmed == None || *MaybeImmed > 63)
4933 return None;
4934 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4935 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4936}
4937
4938InstructionSelector::ComplexRendererFns
4939AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4940 auto MaybeImmed = getImmedFromMO(Root);
4941 if (MaybeImmed == None || *MaybeImmed > 63)
4942 return None;
4943 uint64_t Enc = 63 - *MaybeImmed;
4944 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4945}
4946
4947/// Helper to select an immediate value that can be represented as a 12-bit
4948/// value shifted left by either 0 or 12. If it is possible to do so, return
4949/// the immediate and shift value. If not, return None.
4950///
4951/// Used by selectArithImmed and selectNegArithImmed.
4952InstructionSelector::ComplexRendererFns
4953AArch64InstructionSelector::select12BitValueWithLeftShift(
4954 uint64_t Immed) const {
4955 unsigned ShiftAmt;
4956 if (Immed >> 12 == 0) {
4957 ShiftAmt = 0;
4958 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4959 ShiftAmt = 12;
4960 Immed = Immed >> 12;
4961 } else
4962 return None;
4963
4964 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4965 return {{
4966 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4967 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4968 }};
4969}
4970
4971/// SelectArithImmed - Select an immediate value that can be represented as
4972/// a 12-bit value shifted left by either 0 or 12. If so, return true with
4973/// Val set to the 12-bit value and Shift set to the shifter operand.
4974InstructionSelector::ComplexRendererFns
4975AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4976 // This function is called from the addsub_shifted_imm ComplexPattern,
4977 // which lists [imm] as the list of opcode it's interested in, however
4978 // we still need to check whether the operand is actually an immediate
4979 // here because the ComplexPattern opcode list is only used in
4980 // root-level opcode matching.
4981 auto MaybeImmed = getImmedFromMO(Root);
4982 if (MaybeImmed == None)
4983 return None;
4984 return select12BitValueWithLeftShift(*MaybeImmed);
4985}
4986
4987/// SelectNegArithImmed - As above, but negates the value before trying to
4988/// select it.
4989InstructionSelector::ComplexRendererFns
4990AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4991 // We need a register here, because we need to know if we have a 64 or 32
4992 // bit immediate.
4993 if (!Root.isReg())
4994 return None;
4995 auto MaybeImmed = getImmedFromMO(Root);
4996 if (MaybeImmed == None)
4997 return None;
4998 uint64_t Immed = *MaybeImmed;
4999
5000 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5001 // have the opposite effect on the C flag, so this pattern mustn't match under
5002 // those circumstances.
5003 if (Immed == 0)
5004 return None;
5005
5006 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5007 // the root.
5008 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5009 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5010 Immed = ~((uint32_t)Immed) + 1;
5011 else
5012 Immed = ~Immed + 1ULL;
5013
5014 if (Immed & 0xFFFFFFFFFF000000ULL)
5015 return None;
5016
5017 Immed &= 0xFFFFFFULL;
5018 return select12BitValueWithLeftShift(Immed);
5019}
5020
5021/// Return true if it is worth folding MI into an extended register. That is,
5022/// if it's safe to pull it into the addressing mode of a load or store as a
5023/// shift.
5024bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5025 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5026 // Always fold if there is one use, or if we're optimizing for size.
5027 Register DefReg = MI.getOperand(0).getReg();
5028 if (MRI.hasOneNonDBGUse(DefReg) ||
5029 MI.getParent()->getParent()->getFunction().hasMinSize())
5030 return true;
5031
5032 // It's better to avoid folding and recomputing shifts when we don't have a
5033 // fastpath.
5034 if (!STI.hasLSLFast())
5035 return false;
5036
5037 // We have a fastpath, so folding a shift in and potentially computing it
5038 // many times may be beneficial. Check if this is only used in memory ops.
5039 // If it is, then we should fold.
5040 return all_of(MRI.use_nodbg_instructions(DefReg),
5041 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5042}
5043
5044static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5045 switch (Type) {
5046 case AArch64_AM::SXTB:
5047 case AArch64_AM::SXTH:
5048 case AArch64_AM::SXTW:
5049 return true;
5050 default:
5051 return false;
5052 }
5053}
5054
5055InstructionSelector::ComplexRendererFns
5056AArch64InstructionSelector::selectExtendedSHL(
5057 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5058 unsigned SizeInBytes, bool WantsExt) const {
5059 assert(Base.isReg() && "Expected base to be a register operand")((Base.isReg() && "Expected base to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5059, __PRETTY_FUNCTION__))
;
5060 assert(Offset.isReg() && "Expected offset to be a register operand")((Offset.isReg() && "Expected offset to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5060, __PRETTY_FUNCTION__))
;
5061
5062 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5063 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5064 if (!OffsetInst)
5065 return None;
5066
5067 unsigned OffsetOpc = OffsetInst->getOpcode();
5068 bool LookedThroughZExt = false;
5069 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5070 // Try to look through a ZEXT.
5071 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5072 return None;
5073
5074 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5075 OffsetOpc = OffsetInst->getOpcode();
5076 LookedThroughZExt = true;
5077
5078 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5079 return None;
5080 }
5081 // Make sure that the memory op is a valid size.
5082 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5083 if (LegalShiftVal == 0)
5084 return None;
5085 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5086 return None;
5087
5088 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5089 // register we will offset is the LHS, and the register containing the
5090 // constant is the RHS.
5091 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5092 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5093 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5094 if (!ValAndVReg) {
5095 // We didn't get a constant on the RHS. If the opcode is a shift, then
5096 // we're done.
5097 if (OffsetOpc == TargetOpcode::G_SHL)
5098 return None;
5099
5100 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5101 std::swap(OffsetReg, ConstantReg);
5102 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5103 if (!ValAndVReg)
5104 return None;
5105 }
5106
5107 // The value must fit into 3 bits, and must be positive. Make sure that is
5108 // true.
5109 int64_t ImmVal = ValAndVReg->Value;
5110
5111 // Since we're going to pull this into a shift, the constant value must be
5112 // a power of 2. If we got a multiply, then we need to check this.
5113 if (OffsetOpc == TargetOpcode::G_MUL) {
5114 if (!isPowerOf2_32(ImmVal))
5115 return None;
5116
5117 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5118 ImmVal = Log2_32(ImmVal);
5119 }
5120
5121 if ((ImmVal & 0x7) != ImmVal)
5122 return None;
5123
5124 // We are only allowed to shift by LegalShiftVal. This shift value is built
5125 // into the instruction, so we can't just use whatever we want.
5126 if (ImmVal != LegalShiftVal)
5127 return None;
5128
5129 unsigned SignExtend = 0;
5130 if (WantsExt) {
5131 // Check if the offset is defined by an extend, unless we looked through a
5132 // G_ZEXT earlier.
5133 if (!LookedThroughZExt) {
5134 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5135 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5136 if (Ext == AArch64_AM::InvalidShiftExtend)
5137 return None;
5138
5139 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5140 // We only support SXTW for signed extension here.
5141 if (SignExtend && Ext != AArch64_AM::SXTW)
5142 return None;
5143 OffsetReg = ExtInst->getOperand(1).getReg();
5144 }
5145
5146 // Need a 32-bit wide register here.
5147 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5148 OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
5149 }
5150
5151 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5152 // offset. Signify that we are shifting by setting the shift flag to 1.
5153 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5154 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5155 [=](MachineInstrBuilder &MIB) {
5156 // Need to add both immediates here to make sure that they are both
5157 // added to the instruction.
5158 MIB.addImm(SignExtend);
5159 MIB.addImm(1);
5160 }}};
5161}
5162
5163/// This is used for computing addresses like this:
5164///
5165/// ldr x1, [x2, x3, lsl #3]
5166///
5167/// Where x2 is the base register, and x3 is an offset register. The shift-left
5168/// is a constant value specific to this load instruction. That is, we'll never
5169/// see anything other than a 3 here (which corresponds to the size of the
5170/// element being loaded.)
5171InstructionSelector::ComplexRendererFns
5172AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5173 MachineOperand &Root, unsigned SizeInBytes) const {
5174 if (!Root.isReg())
5175 return None;
5176 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5177
5178 // We want to find something like this:
5179 //
5180 // val = G_CONSTANT LegalShiftVal
5181 // shift = G_SHL off_reg val
5182 // ptr = G_PTR_ADD base_reg shift
5183 // x = G_LOAD ptr
5184 //
5185 // And fold it into this addressing mode:
5186 //
5187 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5188
5189 // Check if we can find the G_PTR_ADD.
5190 MachineInstr *PtrAdd =
5191 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5192 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5193 return None;
5194
5195 // Now, try to match an opcode which will match our specific offset.
5196 // We want a G_SHL or a G_MUL.
5197 MachineInstr *OffsetInst =
5198 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5199 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5200 OffsetInst->getOperand(0), SizeInBytes,
5201 /*WantsExt=*/false);
5202}
5203
5204/// This is used for computing addresses like this:
5205///
5206/// ldr x1, [x2, x3]
5207///
5208/// Where x2 is the base register, and x3 is an offset register.
5209///
5210/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5211/// this will do so. Otherwise, it will return None.
5212InstructionSelector::ComplexRendererFns
5213AArch64InstructionSelector::selectAddrModeRegisterOffset(
5214 MachineOperand &Root) const {
5215 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5216
5217 // We need a GEP.
5218 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5219 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5220 return None;
5221
5222 // If this is used more than once, let's not bother folding.
5223 // TODO: Check if they are memory ops. If they are, then we can still fold
5224 // without having to recompute anything.
5225 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5226 return None;
5227
5228 // Base is the GEP's LHS, offset is its RHS.
5229 return {{[=](MachineInstrBuilder &MIB) {
5230 MIB.addUse(Gep->getOperand(1).getReg());
5231 },
5232 [=](MachineInstrBuilder &MIB) {
5233 MIB.addUse(Gep->getOperand(2).getReg());
5234 },
5235 [=](MachineInstrBuilder &MIB) {
5236 // Need to add both immediates here to make sure that they are both
5237 // added to the instruction.
5238 MIB.addImm(0);
5239 MIB.addImm(0);
5240 }}};
5241}
5242
5243/// This is intended to be equivalent to selectAddrModeXRO in
5244/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5245InstructionSelector::ComplexRendererFns
5246AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5247 unsigned SizeInBytes) const {
5248 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5249 if (!Root.isReg())
5250 return None;
5251 MachineInstr *PtrAdd =
5252 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5253 if (!PtrAdd)
5254 return None;
5255
5256 // Check for an immediates which cannot be encoded in the [base + imm]
5257 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5258 // end up with code like:
5259 //
5260 // mov x0, wide
5261 // add x1 base, x0
5262 // ldr x2, [x1, x0]
5263 //
5264 // In this situation, we can use the [base, xreg] addressing mode to save an
5265 // add/sub:
5266 //
5267 // mov x0, wide
5268 // ldr x2, [base, x0]
5269 auto ValAndVReg =
5270 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5271 if (ValAndVReg) {
5272 unsigned Scale = Log2_32(SizeInBytes);
5273 int64_t ImmOff = ValAndVReg->Value;
5274
5275 // Skip immediates that can be selected in the load/store addresing
5276 // mode.
5277 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5278 ImmOff < (0x1000 << Scale))
5279 return None;
5280
5281 // Helper lambda to decide whether or not it is preferable to emit an add.
5282 auto isPreferredADD = [](int64_t ImmOff) {
5283 // Constants in [0x0, 0xfff] can be encoded in an add.
5284 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5285 return true;
5286
5287 // Can it be encoded in an add lsl #12?
5288 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5289 return false;
5290
5291 // It can be encoded in an add lsl #12, but we may not want to. If it is
5292 // possible to select this as a single movz, then prefer that. A single
5293 // movz is faster than an add with a shift.
5294 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5295 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5296 };
5297
5298 // If the immediate can be encoded in a single add/sub, then bail out.
5299 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5300 return None;
5301 }
5302
5303 // Try to fold shifts into the addressing mode.
5304 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5305 if (AddrModeFns)
5306 return AddrModeFns;
5307
5308 // If that doesn't work, see if it's possible to fold in registers from
5309 // a GEP.
5310 return selectAddrModeRegisterOffset(Root);
5311}
5312
5313/// This is used for computing addresses like this:
5314///
5315/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5316///
5317/// Where we have a 64-bit base register, a 32-bit offset register, and an
5318/// extend (which may or may not be signed).
5319InstructionSelector::ComplexRendererFns
5320AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5321 unsigned SizeInBytes) const {
5322 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5323
5324 MachineInstr *PtrAdd =
5325 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5326 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5327 return None;
5328
5329 MachineOperand &LHS = PtrAdd->getOperand(1);
5330 MachineOperand &RHS = PtrAdd->getOperand(2);
5331 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5332
5333 // The first case is the same as selectAddrModeXRO, except we need an extend.
5334 // In this case, we try to find a shift and extend, and fold them into the
5335 // addressing mode.
5336 //
5337 // E.g.
5338 //
5339 // off_reg = G_Z/S/ANYEXT ext_reg
5340 // val = G_CONSTANT LegalShiftVal
5341 // shift = G_SHL off_reg val
5342 // ptr = G_PTR_ADD base_reg shift
5343 // x = G_LOAD ptr
5344 //
5345 // In this case we can get a load like this:
5346 //
5347 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5348 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5349 SizeInBytes, /*WantsExt=*/true);
5350 if (ExtendedShl)
5351 return ExtendedShl;
5352
5353 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5354 //
5355 // e.g.
5356 // ldr something, [base_reg, ext_reg, sxtw]
5357 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5358 return None;
5359
5360 // Check if this is an extend. We'll get an extend type if it is.
5361 AArch64_AM::ShiftExtendType Ext =
5362 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5363 if (Ext == AArch64_AM::InvalidShiftExtend)
5364 return None;
5365
5366 // Need a 32-bit wide register.
5367 MachineIRBuilder MIB(*PtrAdd);
5368 Register ExtReg =
5369 narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
5370 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5371
5372 // Base is LHS, offset is ExtReg.
5373 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5374 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5375 [=](MachineInstrBuilder &MIB) {
5376 MIB.addImm(SignExtend);
5377 MIB.addImm(0);
5378 }}};
5379}
5380
5381/// Select a "register plus unscaled signed 9-bit immediate" address. This
5382/// should only match when there is an offset that is not valid for a scaled
5383/// immediate addressing mode. The "Size" argument is the size in bytes of the
5384/// memory reference, which is needed here to know what is valid for a scaled
5385/// immediate.
5386InstructionSelector::ComplexRendererFns
5387AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5388 unsigned Size) const {
5389 MachineRegisterInfo &MRI =
5390 Root.getParent()->getParent()->getParent()->getRegInfo();
5391
5392 if (!Root.isReg())
5393 return None;
5394
5395 if (!isBaseWithConstantOffset(Root, MRI))
5396 return None;
5397
5398 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5399 if (!RootDef)
5400 return None;
5401
5402 MachineOperand &OffImm = RootDef->getOperand(2);
5403 if (!OffImm.isReg())
5404 return None;
5405 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5406 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5407 return None;
5408 int64_t RHSC;
5409 MachineOperand &RHSOp1 = RHS->getOperand(1);
5410 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5411 return None;
5412 RHSC = RHSOp1.getCImm()->getSExtValue();
5413
5414 // If the offset is valid as a scaled immediate, don't match here.
5415 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5416 return None;
5417 if (RHSC >= -256 && RHSC < 256) {
5418 MachineOperand &Base = RootDef->getOperand(1);
5419 return {{
5420 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5421 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5422 }};
5423 }
5424 return None;
5425}
5426
5427InstructionSelector::ComplexRendererFns
5428AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5429 unsigned Size,
5430 MachineRegisterInfo &MRI) const {
5431 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5432 return None;
5433 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5434 if (Adrp.getOpcode() != AArch64::ADRP)
5435 return None;
5436
5437 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5438 // TODO: Need to check GV's offset % size if doing offset folding into globals.
5439 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global")((Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"
) ? static_cast<void> (0) : __assert_fail ("Adrp.getOperand(1).getOffset() == 0 && \"Unexpected offset in global\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5439, __PRETTY_FUNCTION__))
;
5440 auto GV = Adrp.getOperand(1).getGlobal();
5441 if (GV->isThreadLocal())
5442 return None;
5443
5444 auto &MF = *RootDef.getParent()->getParent();
5445 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5446 return None;
5447
5448 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5449 MachineIRBuilder MIRBuilder(RootDef);
5450 Register AdrpReg = Adrp.getOperand(0).getReg();
5451 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5452 [=](MachineInstrBuilder &MIB) {
5453 MIB.addGlobalAddress(GV, /* Offset */ 0,
5454 OpFlags | AArch64II::MO_PAGEOFF |
5455 AArch64II::MO_NC);
5456 }}};
5457}
5458
5459/// Select a "register plus scaled unsigned 12-bit immediate" address. The
5460/// "Size" argument is the size in bytes of the memory reference, which
5461/// determines the scale.
5462InstructionSelector::ComplexRendererFns
5463AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5464 unsigned Size) const {
5465 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5466 MachineRegisterInfo &MRI = MF.getRegInfo();
5467
5468 if (!Root.isReg())
5469 return None;
5470
5471 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5472 if (!RootDef)
5473 return None;
5474
5475 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5476 return {{
5477 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5478 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5479 }};
5480 }
5481
5482 CodeModel::Model CM = MF.getTarget().getCodeModel();
5483 // Check if we can fold in the ADD of small code model ADRP + ADD address.
5484 if (CM == CodeModel::Small) {
5485 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5486 if (OpFns)
5487 return OpFns;
5488 }
5489
5490 if (isBaseWithConstantOffset(Root, MRI)) {
5491 MachineOperand &LHS = RootDef->getOperand(1);
5492 MachineOperand &RHS = RootDef->getOperand(2);
5493 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5494 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5495 if (LHSDef && RHSDef) {
5496 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5497 unsigned Scale = Log2_32(Size);
5498 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5499 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5500 return {{
5501 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5502 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5503 }};
5504
5505 return {{
5506 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5507 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5508 }};
5509 }
5510 }
5511 }
5512
5513 // Before falling back to our general case, check if the unscaled
5514 // instructions can handle this. If so, that's preferable.
5515 if (selectAddrModeUnscaled(Root, Size).hasValue())
5516 return None;
5517
5518 return {{
5519 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5520 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5521 }};
5522}
5523
5524/// Given a shift instruction, return the correct shift type for that
5525/// instruction.
5526static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5527 // TODO: Handle AArch64_AM::ROR
5528 switch (MI.getOpcode()) {
5529 default:
5530 return AArch64_AM::InvalidShiftExtend;
5531 case TargetOpcode::G_SHL:
5532 return AArch64_AM::LSL;
5533 case TargetOpcode::G_LSHR:
5534 return AArch64_AM::LSR;
5535 case TargetOpcode::G_ASHR:
5536 return AArch64_AM::ASR;
5537 }
5538}
5539
5540/// Select a "shifted register" operand. If the value is not shifted, set the
5541/// shift operand to a default value of "lsl 0".
5542///
5543/// TODO: Allow shifted register to be rotated in logical instructions.
5544InstructionSelector::ComplexRendererFns
5545AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5546 if (!Root.isReg())
5547 return None;
5548 MachineRegisterInfo &MRI =
5549 Root.getParent()->getParent()->getParent()->getRegInfo();
5550
5551 // Check if the operand is defined by an instruction which corresponds to
5552 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5553 //
5554 // TODO: Handle AArch64_AM::ROR for logical instructions.
5555 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5556 if (!ShiftInst)
5557 return None;
5558 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5559 if (ShType == AArch64_AM::InvalidShiftExtend)
5560 return None;
5561 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5562 return None;
5563
5564 // Need an immediate on the RHS.
5565 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5566 auto Immed = getImmedFromMO(ShiftRHS);
5567 if (!Immed)
5568 return None;
5569
5570 // We have something that we can fold. Fold in the shift's LHS and RHS into
5571 // the instruction.
5572 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5573 Register ShiftReg = ShiftLHS.getReg();
5574
5575 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5576 unsigned Val = *Immed & (NumBits - 1);
5577 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5578
5579 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5580 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5581}
5582
5583AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5584 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5585 unsigned Opc = MI.getOpcode();
5586
5587 // Handle explicit extend instructions first.
5588 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5589 unsigned Size;
5590 if (Opc == TargetOpcode::G_SEXT)
5591 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5592 else
5593 Size = MI.getOperand(2).getImm();
5594 assert(Size != 64 && "Extend from 64 bits?")((Size != 64 && "Extend from 64 bits?") ? static_cast
<void> (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5594, __PRETTY_FUNCTION__))
;
5595 switch (Size) {
5596 case 8:
5597 return AArch64_AM::SXTB;
5598 case 16:
5599 return AArch64_AM::SXTH;
5600 case 32:
5601 return AArch64_AM::SXTW;
5602 default:
5603 return AArch64_AM::InvalidShiftExtend;
5604 }
5605 }
5606
5607 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5608 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5609 assert(Size != 64 && "Extend from 64 bits?")((Size != 64 && "Extend from 64 bits?") ? static_cast
<void> (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "/build/llvm-toolchain-snapshot-12~++20201129111111+e987fbdd85d/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5609, __PRETTY_FUNCTION__))
;
5610 switch (Size) {
5611 case 8:
5612 return AArch64_AM::UXTB;
5613 case 16:
5614 return AArch64_AM::UXTH;
5615 case 32:
5616 return AArch64_AM::UXTW;
5617 default:
5618 return AArch64_AM::InvalidShiftExtend;
5619 }
5620 }
5621
5622 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5623 // on the RHS.
5624 if (Opc != TargetOpcode::G_AND)
5625 return AArch64_AM::InvalidShiftExtend;
5626
5627 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5628 if (!MaybeAndMask)
5629 return AArch64_AM::InvalidShiftExtend;
5630 uint64_t AndMask = *MaybeAndMask;
5631 switch (AndMask) {
5632 default:
5633 return AArch64_AM::InvalidShiftExtend;
5634 case 0xFF:
5635 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5636 case 0xFFFF:
5637 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5638 case 0xFFFFFFFF:
5639 return AArch64_AM::UXTW;
5640 }
5641}
5642
5643Register AArch64InstructionSelector::narrowExtendRegIfNeeded(