Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 5717, column 67
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-14-063029-18377-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "AArch64GlobalISelUtils.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "MCTargetDesc/AArch64MCTargetDesc.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
27#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineConstantPool.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstr.h"
33#include "llvm/CodeGen/MachineInstrBuilder.h"
34#include "llvm/CodeGen/MachineOperand.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/TargetOpcodes.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/PatternMatch.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/IntrinsicsAArch64.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
45#include "llvm/Support/raw_ostream.h"
46
47#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
48
49using namespace llvm;
50using namespace MIPatternMatch;
51using namespace AArch64GISelUtils;
52
53namespace llvm {
54class BlockFrequencyInfo;
55class ProfileSummaryInfo;
56}
57
58namespace {
59
60#define GET_GLOBALISEL_PREDICATE_BITSET
61#include "AArch64GenGlobalISel.inc"
62#undef GET_GLOBALISEL_PREDICATE_BITSET
63
64class AArch64InstructionSelector : public InstructionSelector {
65public:
66 AArch64InstructionSelector(const AArch64TargetMachine &TM,
67 const AArch64Subtarget &STI,
68 const AArch64RegisterBankInfo &RBI);
69
70 bool select(MachineInstr &I) override;
71 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
72
73 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
74 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
75 BlockFrequencyInfo *BFI) override {
76 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
77
78 // hasFnAttribute() is expensive to call on every BRCOND selection, so
79 // cache it here for each run of the selector.
80 ProduceNonFlagSettingCondBr =
81 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
82 MFReturnAddr = Register();
83
84 processPHIs(MF);
85 }
86
87private:
88 /// tblgen-erated 'select' implementation, used as the initial selector for
89 /// the patterns that don't require complex C++.
90 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
91
92 // A lowering phase that runs before any selection attempts.
93 // Returns true if the instruction was modified.
94 bool preISelLower(MachineInstr &I);
95
96 // An early selection function that runs before the selectImpl() call.
97 bool earlySelect(MachineInstr &I) const;
98
99 // Do some preprocessing of G_PHIs before we begin selection.
100 void processPHIs(MachineFunction &MF);
101
102 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
103
104 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
105 bool contractCrossBankCopyIntoStore(MachineInstr &I,
106 MachineRegisterInfo &MRI);
107
108 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
111 MachineRegisterInfo &MRI) const;
112 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
113 MachineRegisterInfo &MRI) const;
114
115 ///@{
116 /// Helper functions for selectCompareBranch.
117 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
118 MachineIRBuilder &MIB) const;
119 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
120 MachineIRBuilder &MIB) const;
121 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
122 MachineIRBuilder &MIB) const;
123 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
124 MachineBasicBlock *DstMBB,
125 MachineIRBuilder &MIB) const;
126 ///@}
127
128 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
129 MachineRegisterInfo &MRI) const;
130
131 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
132 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
133
134 // Helper to generate an equivalent of scalar_to_vector into a new register,
135 // returned via 'Dst'.
136 MachineInstr *emitScalarToVector(unsigned EltSize,
137 const TargetRegisterClass *DstRC,
138 Register Scalar,
139 MachineIRBuilder &MIRBuilder) const;
140
141 /// Emit a lane insert into \p DstReg, or a new vector register if None is
142 /// provided.
143 ///
144 /// The lane inserted into is defined by \p LaneIdx. The vector source
145 /// register is given by \p SrcReg. The register containing the element is
146 /// given by \p EltReg.
147 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
148 Register EltReg, unsigned LaneIdx,
149 const RegisterBank &RB,
150 MachineIRBuilder &MIRBuilder) const;
151
152 /// Emit a sequence of instructions representing a constant \p CV for a
153 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
154 ///
155 /// \returns the last instruction in the sequence on success, and nullptr
156 /// otherwise.
157 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
158 MachineIRBuilder &MIRBuilder,
159 MachineRegisterInfo &MRI) const;
160
161 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
162 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
163 MachineRegisterInfo &MRI) const;
164 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
165 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
166 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
167
168 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
169 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
170 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
171 bool selectSplitVectorUnmerge(MachineInstr &I,
172 MachineRegisterInfo &MRI) const;
173 bool selectIntrinsicWithSideEffects(MachineInstr &I,
174 MachineRegisterInfo &MRI) const;
175 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
176 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
177 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
178 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
179 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
180 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
181 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
182 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
183
184 unsigned emitConstantPoolEntry(const Constant *CPVal,
185 MachineFunction &MF) const;
186 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
187 MachineIRBuilder &MIRBuilder) const;
188
189 // Emit a vector concat operation.
190 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
191 Register Op2,
192 MachineIRBuilder &MIRBuilder) const;
193
194 // Emit an integer compare between LHS and RHS, which checks for Predicate.
195 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
196 MachineOperand &Predicate,
197 MachineIRBuilder &MIRBuilder) const;
198
199 /// Emit a floating point comparison between \p LHS and \p RHS.
200 /// \p Pred if given is the intended predicate to use.
201 MachineInstr *emitFPCompare(Register LHS, Register RHS,
202 MachineIRBuilder &MIRBuilder,
203 Optional<CmpInst::Predicate> = None) const;
204
205 MachineInstr *emitInstr(unsigned Opcode,
206 std::initializer_list<llvm::DstOp> DstOps,
207 std::initializer_list<llvm::SrcOp> SrcOps,
208 MachineIRBuilder &MIRBuilder,
209 const ComplexRendererFns &RenderFns = None) const;
210 /// Helper function to emit an add or sub instruction.
211 ///
212 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
213 /// in a specific order.
214 ///
215 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
216 ///
217 /// \code
218 /// const std::array<std::array<unsigned, 2>, 4> Table {
219 /// {{AArch64::ADDXri, AArch64::ADDWri},
220 /// {AArch64::ADDXrs, AArch64::ADDWrs},
221 /// {AArch64::ADDXrr, AArch64::ADDWrr},
222 /// {AArch64::SUBXri, AArch64::SUBWri},
223 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
224 /// \endcode
225 ///
226 /// Each row in the table corresponds to a different addressing mode. Each
227 /// column corresponds to a different register size.
228 ///
229 /// \attention Rows must be structured as follows:
230 /// - Row 0: The ri opcode variants
231 /// - Row 1: The rs opcode variants
232 /// - Row 2: The rr opcode variants
233 /// - Row 3: The ri opcode variants for negative immediates
234 /// - Row 4: The rx opcode variants
235 ///
236 /// \attention Columns must be structured as follows:
237 /// - Column 0: The 64-bit opcode variants
238 /// - Column 1: The 32-bit opcode variants
239 ///
240 /// \p Dst is the destination register of the binop to emit.
241 /// \p LHS is the left-hand operand of the binop to emit.
242 /// \p RHS is the right-hand operand of the binop to emit.
243 MachineInstr *emitAddSub(
244 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
245 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
246 MachineIRBuilder &MIRBuilder) const;
247 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
248 MachineOperand &RHS,
249 MachineIRBuilder &MIRBuilder) const;
250 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
251 MachineIRBuilder &MIRBuilder) const;
252 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
253 MachineIRBuilder &MIRBuilder) const;
254 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
255 MachineIRBuilder &MIRBuilder) const;
256 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
257 MachineIRBuilder &MIRBuilder) const;
258 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
259 AArch64CC::CondCode CC,
260 MachineIRBuilder &MIRBuilder) const;
261 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
262 const RegisterBank &DstRB, LLT ScalarTy,
263 Register VecReg, unsigned LaneIdx,
264 MachineIRBuilder &MIRBuilder) const;
265
266 /// Emit a CSet for an integer compare.
267 ///
268 /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
269 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
270 MachineIRBuilder &MIRBuilder,
271 Register SrcReg = AArch64::WZR) const;
272 /// Emit a CSet for a FP compare.
273 ///
274 /// \p Dst is expected to be a 32-bit scalar register.
275 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
276 MachineIRBuilder &MIRBuilder) const;
277
278 /// Emit the overflow op for \p Opcode.
279 ///
280 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
281 /// G_USUBO, etc.
282 std::pair<MachineInstr *, AArch64CC::CondCode>
283 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
284 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
285
286 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
287 /// \p IsNegative is true if the test should be "not zero".
288 /// This will also optimize the test bit instruction when possible.
289 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
290 MachineBasicBlock *DstMBB,
291 MachineIRBuilder &MIB) const;
292
293 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
294 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
295 MachineBasicBlock *DestMBB,
296 MachineIRBuilder &MIB) const;
297
298 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
299 // We use these manually instead of using the importer since it doesn't
300 // support SDNodeXForm.
301 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
302 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
303 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
304 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
305
306 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
307 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
308 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
309
310 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
311 unsigned Size) const;
312
313 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
314 return selectAddrModeUnscaled(Root, 1);
315 }
316 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
317 return selectAddrModeUnscaled(Root, 2);
318 }
319 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
320 return selectAddrModeUnscaled(Root, 4);
321 }
322 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
323 return selectAddrModeUnscaled(Root, 8);
324 }
325 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
326 return selectAddrModeUnscaled(Root, 16);
327 }
328
329 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
330 /// from complex pattern matchers like selectAddrModeIndexed().
331 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
332 MachineRegisterInfo &MRI) const;
333
334 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
335 unsigned Size) const;
336 template <int Width>
337 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
338 return selectAddrModeIndexed(Root, Width / 8);
339 }
340
341 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
342 const MachineRegisterInfo &MRI) const;
343 ComplexRendererFns
344 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
345 unsigned SizeInBytes) const;
346
347 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
348 /// or not a shift + extend should be folded into an addressing mode. Returns
349 /// None when this is not profitable or possible.
350 ComplexRendererFns
351 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
352 MachineOperand &Offset, unsigned SizeInBytes,
353 bool WantsExt) const;
354 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
355 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
356 unsigned SizeInBytes) const;
357 template <int Width>
358 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
359 return selectAddrModeXRO(Root, Width / 8);
360 }
361
362 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
363 unsigned SizeInBytes) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
366 return selectAddrModeWRO(Root, Width / 8);
367 }
368
369 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
370
371 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
372 return selectShiftedRegister(Root);
373 }
374
375 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
376 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
377 // For now, make them the same. The only difference between the two is that
378 // logical shifts are allowed to fold in rotates. Otherwise, these are
379 // functionally the same.
380 return selectShiftedRegister(Root);
381 }
382
383 /// Given an extend instruction, determine the correct shift-extend type for
384 /// that instruction.
385 ///
386 /// If the instruction is going to be used in a load or store, pass
387 /// \p IsLoadStore = true.
388 AArch64_AM::ShiftExtendType
389 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
390 bool IsLoadStore = false) const;
391
392 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
393 ///
394 /// \returns Either \p Reg if no change was necessary, or the new register
395 /// created by moving \p Reg.
396 ///
397 /// Note: This uses emitCopy right now.
398 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
399 MachineIRBuilder &MIB) const;
400
401 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
402
403 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
404 int OpIdx = -1) const;
405 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
406 int OpIdx = -1) const;
407 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
408 int OpIdx = -1) const;
409 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
410 int OpIdx = -1) const;
411 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
412 int OpIdx = -1) const;
413 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
414 int OpIdx = -1) const;
415
416 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
417 void materializeLargeCMVal(MachineInstr &I, const Value *V,
418 unsigned OpFlags) const;
419
420 // Optimization methods.
421 bool tryOptSelect(MachineInstr &MI) const;
422 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
423 MachineOperand &Predicate,
424 MachineIRBuilder &MIRBuilder) const;
425
426 /// Return true if \p MI is a load or store of \p NumBytes bytes.
427 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
428
429 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
430 /// register zeroed out. In other words, the result of MI has been explicitly
431 /// zero extended.
432 bool isDef32(const MachineInstr &MI) const;
433
434 const AArch64TargetMachine &TM;
435 const AArch64Subtarget &STI;
436 const AArch64InstrInfo &TII;
437 const AArch64RegisterInfo &TRI;
438 const AArch64RegisterBankInfo &RBI;
439
440 bool ProduceNonFlagSettingCondBr = false;
441
442 // Some cached values used during selection.
443 // We use LR as a live-in register, and we keep track of it here as it can be
444 // clobbered by calls.
445 Register MFReturnAddr;
446
447#define GET_GLOBALISEL_PREDICATES_DECL
448#include "AArch64GenGlobalISel.inc"
449#undef GET_GLOBALISEL_PREDICATES_DECL
450
451// We declare the temporaries used by selectImpl() in the class to minimize the
452// cost of constructing placeholder values.
453#define GET_GLOBALISEL_TEMPORARIES_DECL
454#include "AArch64GenGlobalISel.inc"
455#undef GET_GLOBALISEL_TEMPORARIES_DECL
456};
457
458} // end anonymous namespace
459
460#define GET_GLOBALISEL_IMPL
461#include "AArch64GenGlobalISel.inc"
462#undef GET_GLOBALISEL_IMPL
463
464AArch64InstructionSelector::AArch64InstructionSelector(
465 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
466 const AArch64RegisterBankInfo &RBI)
467 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
468 TRI(*STI.getRegisterInfo()), RBI(RBI),
469#define GET_GLOBALISEL_PREDICATES_INIT
470#include "AArch64GenGlobalISel.inc"
471#undef GET_GLOBALISEL_PREDICATES_INIT
472#define GET_GLOBALISEL_TEMPORARIES_INIT
473#include "AArch64GenGlobalISel.inc"
474#undef GET_GLOBALISEL_TEMPORARIES_INIT
475{
476}
477
478// FIXME: This should be target-independent, inferred from the types declared
479// for each class in the bank.
480static const TargetRegisterClass *
481getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
482 const RegisterBankInfo &RBI,
483 bool GetAllRegSet = false) {
484 if (RB.getID() == AArch64::GPRRegBankID) {
485 if (Ty.getSizeInBits() <= 32)
486 return GetAllRegSet ? &AArch64::GPR32allRegClass
487 : &AArch64::GPR32RegClass;
488 if (Ty.getSizeInBits() == 64)
489 return GetAllRegSet ? &AArch64::GPR64allRegClass
490 : &AArch64::GPR64RegClass;
491 return nullptr;
492 }
493
494 if (RB.getID() == AArch64::FPRRegBankID) {
495 if (Ty.getSizeInBits() <= 16)
496 return &AArch64::FPR16RegClass;
497 if (Ty.getSizeInBits() == 32)
498 return &AArch64::FPR32RegClass;
499 if (Ty.getSizeInBits() == 64)
500 return &AArch64::FPR64RegClass;
501 if (Ty.getSizeInBits() == 128)
502 return &AArch64::FPR128RegClass;
503 return nullptr;
504 }
505
506 return nullptr;
507}
508
509/// Given a register bank, and size in bits, return the smallest register class
510/// that can represent that combination.
511static const TargetRegisterClass *
512getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
513 bool GetAllRegSet = false) {
514 unsigned RegBankID = RB.getID();
515
516 if (RegBankID == AArch64::GPRRegBankID) {
517 if (SizeInBits <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (SizeInBits == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
523 }
524
525 if (RegBankID == AArch64::FPRRegBankID) {
526 switch (SizeInBits) {
527 default:
528 return nullptr;
529 case 8:
530 return &AArch64::FPR8RegClass;
531 case 16:
532 return &AArch64::FPR16RegClass;
533 case 32:
534 return &AArch64::FPR32RegClass;
535 case 64:
536 return &AArch64::FPR64RegClass;
537 case 128:
538 return &AArch64::FPR128RegClass;
539 }
540 }
541
542 return nullptr;
543}
544
545/// Returns the correct subregister to use for a given register class.
546static bool getSubRegForClass(const TargetRegisterClass *RC,
547 const TargetRegisterInfo &TRI, unsigned &SubReg) {
548 switch (TRI.getRegSizeInBits(*RC)) {
549 case 8:
550 SubReg = AArch64::bsub;
551 break;
552 case 16:
553 SubReg = AArch64::hsub;
554 break;
555 case 32:
556 if (RC != &AArch64::FPR32RegClass)
557 SubReg = AArch64::sub_32;
558 else
559 SubReg = AArch64::ssub;
560 break;
561 case 64:
562 SubReg = AArch64::dsub;
563 break;
564 default:
565 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
566 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
567 return false;
568 }
569
570 return true;
571}
572
573/// Returns the minimum size the given register bank can hold.
574static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
575 switch (RB.getID()) {
576 case AArch64::GPRRegBankID:
577 return 32;
578 case AArch64::FPRRegBankID:
579 return 8;
580 default:
581 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 581)
;
582 }
583}
584
585static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
586 auto &MI = *Root.getParent();
587 auto &MBB = *MI.getParent();
588 auto &MF = *MBB.getParent();
589 auto &MRI = MF.getRegInfo();
590 uint64_t Immed;
591 if (Root.isImm())
592 Immed = Root.getImm();
593 else if (Root.isCImm())
594 Immed = Root.getCImm()->getZExtValue();
595 else if (Root.isReg()) {
596 auto ValAndVReg =
597 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
598 if (!ValAndVReg)
599 return None;
600 Immed = ValAndVReg->Value.getSExtValue();
601 } else
602 return None;
603 return Immed;
604}
605
606/// Check whether \p I is a currently unsupported binary operation:
607/// - it has an unsized type
608/// - an operand is not a vreg
609/// - all operands are not in the same bank
610/// These are checks that should someday live in the verifier, but right now,
611/// these are mostly limitations of the aarch64 selector.
612static bool unsupportedBinOp(const MachineInstr &I,
613 const AArch64RegisterBankInfo &RBI,
614 const MachineRegisterInfo &MRI,
615 const AArch64RegisterInfo &TRI) {
616 LLT Ty = MRI.getType(I.getOperand(0).getReg());
617 if (!Ty.isValid()) {
618 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
619 return true;
620 }
621
622 const RegisterBank *PrevOpBank = nullptr;
623 for (auto &MO : I.operands()) {
624 // FIXME: Support non-register operands.
625 if (!MO.isReg()) {
626 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
627 return true;
628 }
629
630 // FIXME: Can generic operations have physical registers operands? If
631 // so, this will need to be taught about that, and we'll need to get the
632 // bank out of the minimal class for the register.
633 // Either way, this needs to be documented (and possibly verified).
634 if (!Register::isVirtualRegister(MO.getReg())) {
635 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
636 return true;
637 }
638
639 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
640 if (!OpBank) {
641 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
642 return true;
643 }
644
645 if (PrevOpBank && OpBank != PrevOpBank) {
646 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
647 return true;
648 }
649 PrevOpBank = OpBank;
650 }
651 return false;
652}
653
654/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
655/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
656/// and of size \p OpSize.
657/// \returns \p GenericOpc if the combination is unsupported.
658static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
659 unsigned OpSize) {
660 switch (RegBankID) {
661 case AArch64::GPRRegBankID:
662 if (OpSize == 32) {
663 switch (GenericOpc) {
664 case TargetOpcode::G_SHL:
665 return AArch64::LSLVWr;
666 case TargetOpcode::G_LSHR:
667 return AArch64::LSRVWr;
668 case TargetOpcode::G_ASHR:
669 return AArch64::ASRVWr;
670 default:
671 return GenericOpc;
672 }
673 } else if (OpSize == 64) {
674 switch (GenericOpc) {
675 case TargetOpcode::G_PTR_ADD:
676 return AArch64::ADDXrr;
677 case TargetOpcode::G_SHL:
678 return AArch64::LSLVXr;
679 case TargetOpcode::G_LSHR:
680 return AArch64::LSRVXr;
681 case TargetOpcode::G_ASHR:
682 return AArch64::ASRVXr;
683 default:
684 return GenericOpc;
685 }
686 }
687 break;
688 case AArch64::FPRRegBankID:
689 switch (OpSize) {
690 case 32:
691 switch (GenericOpc) {
692 case TargetOpcode::G_FADD:
693 return AArch64::FADDSrr;
694 case TargetOpcode::G_FSUB:
695 return AArch64::FSUBSrr;
696 case TargetOpcode::G_FMUL:
697 return AArch64::FMULSrr;
698 case TargetOpcode::G_FDIV:
699 return AArch64::FDIVSrr;
700 default:
701 return GenericOpc;
702 }
703 case 64:
704 switch (GenericOpc) {
705 case TargetOpcode::G_FADD:
706 return AArch64::FADDDrr;
707 case TargetOpcode::G_FSUB:
708 return AArch64::FSUBDrr;
709 case TargetOpcode::G_FMUL:
710 return AArch64::FMULDrr;
711 case TargetOpcode::G_FDIV:
712 return AArch64::FDIVDrr;
713 case TargetOpcode::G_OR:
714 return AArch64::ORRv8i8;
715 default:
716 return GenericOpc;
717 }
718 }
719 break;
720 }
721 return GenericOpc;
722}
723
724/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
725/// appropriate for the (value) register bank \p RegBankID and of memory access
726/// size \p OpSize. This returns the variant with the base+unsigned-immediate
727/// addressing mode (e.g., LDRXui).
728/// \returns \p GenericOpc if the combination is unsupported.
729static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
730 unsigned OpSize) {
731 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
732 switch (RegBankID) {
733 case AArch64::GPRRegBankID:
734 switch (OpSize) {
735 case 8:
736 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
737 case 16:
738 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
739 case 32:
740 return isStore ? AArch64::STRWui : AArch64::LDRWui;
741 case 64:
742 return isStore ? AArch64::STRXui : AArch64::LDRXui;
743 }
744 break;
745 case AArch64::FPRRegBankID:
746 switch (OpSize) {
747 case 8:
748 return isStore ? AArch64::STRBui : AArch64::LDRBui;
749 case 16:
750 return isStore ? AArch64::STRHui : AArch64::LDRHui;
751 case 32:
752 return isStore ? AArch64::STRSui : AArch64::LDRSui;
753 case 64:
754 return isStore ? AArch64::STRDui : AArch64::LDRDui;
755 }
756 break;
757 }
758 return GenericOpc;
759}
760
761#ifndef NDEBUG
762/// Helper function that verifies that we have a valid copy at the end of
763/// selectCopy. Verifies that the source and dest have the expected sizes and
764/// then returns true.
765static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
766 const MachineRegisterInfo &MRI,
767 const TargetRegisterInfo &TRI,
768 const RegisterBankInfo &RBI) {
769 const Register DstReg = I.getOperand(0).getReg();
770 const Register SrcReg = I.getOperand(1).getReg();
771 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
772 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
773
774 // Make sure the size of the source and dest line up.
775 assert((((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
776 (DstSize == SrcSize ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
777 // Copies are a mean to setup initial types, the number of(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
778 // bits may not exactly match.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
779 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
780 // Copies are a mean to copy bits around, as long as we are(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
781 // on the same register class, that's fine. Otherwise, that(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
782 // means we need some SUBREG_TO_REG or AND & co.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
783 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
784 "Copy with different width?!")(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 784, __PRETTY_FUNCTION__))
;
785
786 // Check the size of the destination.
787 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 788, __PRETTY_FUNCTION__))
788 "GPRs cannot get more than 64-bit width values")(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 788, __PRETTY_FUNCTION__))
;
789
790 return true;
791}
792#endif
793
794/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
795/// to \p *To.
796///
797/// E.g "To = COPY SrcReg:SubReg"
798static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
799 const RegisterBankInfo &RBI, Register SrcReg,
800 const TargetRegisterClass *To, unsigned SubReg) {
801 assert(SrcReg.isValid() && "Expected a valid source register?")((SrcReg.isValid() && "Expected a valid source register?"
) ? static_cast<void> (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 801, __PRETTY_FUNCTION__))
;
802 assert(To && "Destination register class cannot be null")((To && "Destination register class cannot be null") ?
static_cast<void> (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 802, __PRETTY_FUNCTION__))
;
803 assert(SubReg && "Expected a valid subregister")((SubReg && "Expected a valid subregister") ? static_cast
<void> (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 803, __PRETTY_FUNCTION__))
;
804
805 MachineIRBuilder MIB(I);
806 auto SubRegCopy =
807 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
808 MachineOperand &RegOp = I.getOperand(1);
809 RegOp.setReg(SubRegCopy.getReg(0));
810
811 // It's possible that the destination register won't be constrained. Make
812 // sure that happens.
813 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
814 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
815
816 return true;
817}
818
819/// Helper function to get the source and destination register classes for a
820/// copy. Returns a std::pair containing the source register class for the
821/// copy, and the destination register class for the copy. If a register class
822/// cannot be determined, then it will be nullptr.
823static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
824getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
825 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
826 const RegisterBankInfo &RBI) {
827 Register DstReg = I.getOperand(0).getReg();
828 Register SrcReg = I.getOperand(1).getReg();
829 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
830 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
831 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
832 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
833
834 // Special casing for cross-bank copies of s1s. We can technically represent
835 // a 1-bit value with any size of register. The minimum size for a GPR is 32
836 // bits. So, we need to put the FPR on 32 bits as well.
837 //
838 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
839 // then we can pull it into the helpers that get the appropriate class for a
840 // register bank. Or make a new helper that carries along some constraint
841 // information.
842 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
843 SrcSize = DstSize = 32;
844
845 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
846 getMinClassForRegBank(DstRegBank, DstSize, true)};
847}
848
849static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
850 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
851 const RegisterBankInfo &RBI) {
852 Register DstReg = I.getOperand(0).getReg();
853 Register SrcReg = I.getOperand(1).getReg();
854 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
855 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
856
857 // Find the correct register classes for the source and destination registers.
858 const TargetRegisterClass *SrcRC;
859 const TargetRegisterClass *DstRC;
860 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
861
862 if (!DstRC) {
863 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
864 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
865 return false;
866 }
867
868 // A couple helpers below, for making sure that the copy we produce is valid.
869
870 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
871 // to verify that the src and dst are the same size, since that's handled by
872 // the SUBREG_TO_REG.
873 bool KnownValid = false;
874
875 // Returns true, or asserts if something we don't expect happens. Instead of
876 // returning true, we return isValidCopy() to ensure that we verify the
877 // result.
878 auto CheckCopy = [&]() {
879 // If we have a bitcast or something, we can't have physical registers.
880 assert((I.isCopy() ||(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 883, __PRETTY_FUNCTION__))
881 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 883, __PRETTY_FUNCTION__))
882 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 883, __PRETTY_FUNCTION__))
883 "No phys reg on generic operator!")(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 883, __PRETTY_FUNCTION__))
;
884 bool ValidCopy = true;
885#ifndef NDEBUG
886 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
887 assert(ValidCopy && "Invalid copy.")((ValidCopy && "Invalid copy.") ? static_cast<void
> (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 887, __PRETTY_FUNCTION__))
;
888#endif
889 (void)KnownValid;
890 return ValidCopy;
891 };
892
893 // Is this a copy? If so, then we may need to insert a subregister copy.
894 if (I.isCopy()) {
895 // Yes. Check if there's anything to fix up.
896 if (!SrcRC) {
897 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
898 return false;
899 }
900
901 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
902 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
903 unsigned SubReg;
904
905 // If the source bank doesn't support a subregister copy small enough,
906 // then we first need to copy to the destination bank.
907 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
908 const TargetRegisterClass *DstTempRC =
909 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
910 getSubRegForClass(DstRC, TRI, SubReg);
911
912 MachineIRBuilder MIB(I);
913 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
914 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
915 } else if (SrcSize > DstSize) {
916 // If the source register is bigger than the destination we need to
917 // perform a subregister copy.
918 const TargetRegisterClass *SubRegRC =
919 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
920 getSubRegForClass(SubRegRC, TRI, SubReg);
921 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
922 } else if (DstSize > SrcSize) {
923 // If the destination register is bigger than the source we need to do
924 // a promotion using SUBREG_TO_REG.
925 const TargetRegisterClass *PromotionRC =
926 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
927 getSubRegForClass(SrcRC, TRI, SubReg);
928
929 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
930 BuildMI(*I.getParent(), I, I.getDebugLoc(),
931 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
932 .addImm(0)
933 .addUse(SrcReg)
934 .addImm(SubReg);
935 MachineOperand &RegOp = I.getOperand(1);
936 RegOp.setReg(PromoteReg);
937
938 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
939 KnownValid = true;
940 }
941
942 // If the destination is a physical register, then there's nothing to
943 // change, so we're done.
944 if (Register::isPhysicalRegister(DstReg))
945 return CheckCopy();
946 }
947
948 // No need to constrain SrcReg. It will get constrained when we hit another
949 // of its use or its defs. Copies do not have constraints.
950 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
951 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
952 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
953 return false;
954 }
955 I.setDesc(TII.get(AArch64::COPY));
956 return CheckCopy();
957}
958
959static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
960 if (!DstTy.isScalar() || !SrcTy.isScalar())
961 return GenericOpc;
962
963 const unsigned DstSize = DstTy.getSizeInBits();
964 const unsigned SrcSize = SrcTy.getSizeInBits();
965
966 switch (DstSize) {
967 case 32:
968 switch (SrcSize) {
969 case 32:
970 switch (GenericOpc) {
971 case TargetOpcode::G_SITOFP:
972 return AArch64::SCVTFUWSri;
973 case TargetOpcode::G_UITOFP:
974 return AArch64::UCVTFUWSri;
975 case TargetOpcode::G_FPTOSI:
976 return AArch64::FCVTZSUWSr;
977 case TargetOpcode::G_FPTOUI:
978 return AArch64::FCVTZUUWSr;
979 default:
980 return GenericOpc;
981 }
982 case 64:
983 switch (GenericOpc) {
984 case TargetOpcode::G_SITOFP:
985 return AArch64::SCVTFUXSri;
986 case TargetOpcode::G_UITOFP:
987 return AArch64::UCVTFUXSri;
988 case TargetOpcode::G_FPTOSI:
989 return AArch64::FCVTZSUWDr;
990 case TargetOpcode::G_FPTOUI:
991 return AArch64::FCVTZUUWDr;
992 default:
993 return GenericOpc;
994 }
995 default:
996 return GenericOpc;
997 }
998 case 64:
999 switch (SrcSize) {
1000 case 32:
1001 switch (GenericOpc) {
1002 case TargetOpcode::G_SITOFP:
1003 return AArch64::SCVTFUWDri;
1004 case TargetOpcode::G_UITOFP:
1005 return AArch64::UCVTFUWDri;
1006 case TargetOpcode::G_FPTOSI:
1007 return AArch64::FCVTZSUXSr;
1008 case TargetOpcode::G_FPTOUI:
1009 return AArch64::FCVTZUUXSr;
1010 default:
1011 return GenericOpc;
1012 }
1013 case 64:
1014 switch (GenericOpc) {
1015 case TargetOpcode::G_SITOFP:
1016 return AArch64::SCVTFUXDri;
1017 case TargetOpcode::G_UITOFP:
1018 return AArch64::UCVTFUXDri;
1019 case TargetOpcode::G_FPTOSI:
1020 return AArch64::FCVTZSUXDr;
1021 case TargetOpcode::G_FPTOUI:
1022 return AArch64::FCVTZUUXDr;
1023 default:
1024 return GenericOpc;
1025 }
1026 default:
1027 return GenericOpc;
1028 }
1029 default:
1030 return GenericOpc;
1031 };
1032 return GenericOpc;
1033}
1034
1035MachineInstr *
1036AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1037 Register False, AArch64CC::CondCode CC,
1038 MachineIRBuilder &MIB) const {
1039 MachineRegisterInfo &MRI = *MIB.getMRI();
1040 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1042, __PRETTY_FUNCTION__))
1041 RBI.getRegBank(True, MRI, TRI)->getID() &&((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1042, __PRETTY_FUNCTION__))
1042 "Expected both select operands to have the same regbank?")((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1042, __PRETTY_FUNCTION__))
;
1043 LLT Ty = MRI.getType(True);
1044 if (Ty.isVector())
1045 return nullptr;
1046 const unsigned Size = Ty.getSizeInBits();
1047 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1048, __PRETTY_FUNCTION__))
1048 "Expected 32 bit or 64 bit select only?")(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1048, __PRETTY_FUNCTION__))
;
1049 const bool Is32Bit = Size == 32;
1050 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1051 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1052 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1053 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1054 return &*FCSel;
1055 }
1056
1057 // By default, we'll try and emit a CSEL.
1058 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1059 bool Optimized = false;
1060 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1061 &Optimized](Register &Reg, Register &OtherReg,
1062 bool Invert) {
1063 if (Optimized)
1064 return false;
1065
1066 // Attempt to fold:
1067 //
1068 // %sub = G_SUB 0, %x
1069 // %select = G_SELECT cc, %reg, %sub
1070 //
1071 // Into:
1072 // %select = CSNEG %reg, %x, cc
1073 Register MatchReg;
1074 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1075 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1076 Reg = MatchReg;
1077 if (Invert) {
1078 CC = AArch64CC::getInvertedCondCode(CC);
1079 std::swap(Reg, OtherReg);
1080 }
1081 return true;
1082 }
1083
1084 // Attempt to fold:
1085 //
1086 // %xor = G_XOR %x, -1
1087 // %select = G_SELECT cc, %reg, %xor
1088 //
1089 // Into:
1090 // %select = CSINV %reg, %x, cc
1091 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1092 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1093 Reg = MatchReg;
1094 if (Invert) {
1095 CC = AArch64CC::getInvertedCondCode(CC);
1096 std::swap(Reg, OtherReg);
1097 }
1098 return true;
1099 }
1100
1101 // Attempt to fold:
1102 //
1103 // %add = G_ADD %x, 1
1104 // %select = G_SELECT cc, %reg, %add
1105 //
1106 // Into:
1107 // %select = CSINC %reg, %x, cc
1108 if (mi_match(Reg, MRI,
1109 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1110 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1111 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1112 Reg = MatchReg;
1113 if (Invert) {
1114 CC = AArch64CC::getInvertedCondCode(CC);
1115 std::swap(Reg, OtherReg);
1116 }
1117 return true;
1118 }
1119
1120 return false;
1121 };
1122
1123 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1124 // true/false values are constants.
1125 // FIXME: All of these patterns already exist in tablegen. We should be
1126 // able to import these.
1127 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1128 &Optimized]() {
1129 if (Optimized)
1130 return false;
1131 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1132 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1133 if (!TrueCst && !FalseCst)
1134 return false;
1135
1136 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1137 if (TrueCst && FalseCst) {
1138 int64_t T = TrueCst->Value.getSExtValue();
1139 int64_t F = FalseCst->Value.getSExtValue();
1140
1141 if (T == 0 && F == 1) {
1142 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1143 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1144 True = ZReg;
1145 False = ZReg;
1146 return true;
1147 }
1148
1149 if (T == 0 && F == -1) {
1150 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1151 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1152 True = ZReg;
1153 False = ZReg;
1154 return true;
1155 }
1156 }
1157
1158 if (TrueCst) {
1159 int64_t T = TrueCst->Value.getSExtValue();
1160 if (T == 1) {
1161 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1162 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1163 True = False;
1164 False = ZReg;
1165 CC = AArch64CC::getInvertedCondCode(CC);
1166 return true;
1167 }
1168
1169 if (T == -1) {
1170 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1171 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1172 True = False;
1173 False = ZReg;
1174 CC = AArch64CC::getInvertedCondCode(CC);
1175 return true;
1176 }
1177 }
1178
1179 if (FalseCst) {
1180 int64_t F = FalseCst->Value.getSExtValue();
1181 if (F == 1) {
1182 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1183 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1184 False = ZReg;
1185 return true;
1186 }
1187
1188 if (F == -1) {
1189 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1190 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1191 False = ZReg;
1192 return true;
1193 }
1194 }
1195 return false;
1196 };
1197
1198 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1199 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1200 Optimized |= TryOptSelectCst();
1201 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1202 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1203 return &*SelectInst;
1204}
1205
1206static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1207 switch (P) {
1208 default:
1209 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1209)
;
1210 case CmpInst::ICMP_NE:
1211 return AArch64CC::NE;
1212 case CmpInst::ICMP_EQ:
1213 return AArch64CC::EQ;
1214 case CmpInst::ICMP_SGT:
1215 return AArch64CC::GT;
1216 case CmpInst::ICMP_SGE:
1217 return AArch64CC::GE;
1218 case CmpInst::ICMP_SLT:
1219 return AArch64CC::LT;
1220 case CmpInst::ICMP_SLE:
1221 return AArch64CC::LE;
1222 case CmpInst::ICMP_UGT:
1223 return AArch64CC::HI;
1224 case CmpInst::ICMP_UGE:
1225 return AArch64CC::HS;
1226 case CmpInst::ICMP_ULT:
1227 return AArch64CC::LO;
1228 case CmpInst::ICMP_ULE:
1229 return AArch64CC::LS;
1230 }
1231}
1232
1233static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1234 AArch64CC::CondCode &CondCode,
1235 AArch64CC::CondCode &CondCode2) {
1236 CondCode2 = AArch64CC::AL;
1237 switch (P) {
1238 default:
1239 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1239)
;
1240 case CmpInst::FCMP_OEQ:
1241 CondCode = AArch64CC::EQ;
1242 break;
1243 case CmpInst::FCMP_OGT:
1244 CondCode = AArch64CC::GT;
1245 break;
1246 case CmpInst::FCMP_OGE:
1247 CondCode = AArch64CC::GE;
1248 break;
1249 case CmpInst::FCMP_OLT:
1250 CondCode = AArch64CC::MI;
1251 break;
1252 case CmpInst::FCMP_OLE:
1253 CondCode = AArch64CC::LS;
1254 break;
1255 case CmpInst::FCMP_ONE:
1256 CondCode = AArch64CC::MI;
1257 CondCode2 = AArch64CC::GT;
1258 break;
1259 case CmpInst::FCMP_ORD:
1260 CondCode = AArch64CC::VC;
1261 break;
1262 case CmpInst::FCMP_UNO:
1263 CondCode = AArch64CC::VS;
1264 break;
1265 case CmpInst::FCMP_UEQ:
1266 CondCode = AArch64CC::EQ;
1267 CondCode2 = AArch64CC::VS;
1268 break;
1269 case CmpInst::FCMP_UGT:
1270 CondCode = AArch64CC::HI;
1271 break;
1272 case CmpInst::FCMP_UGE:
1273 CondCode = AArch64CC::PL;
1274 break;
1275 case CmpInst::FCMP_ULT:
1276 CondCode = AArch64CC::LT;
1277 break;
1278 case CmpInst::FCMP_ULE:
1279 CondCode = AArch64CC::LE;
1280 break;
1281 case CmpInst::FCMP_UNE:
1282 CondCode = AArch64CC::NE;
1283 break;
1284 }
1285}
1286
1287/// Return a register which can be used as a bit to test in a TB(N)Z.
1288static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1289 MachineRegisterInfo &MRI) {
1290 assert(Reg.isValid() && "Expected valid register!")((Reg.isValid() && "Expected valid register!") ? static_cast
<void> (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1290, __PRETTY_FUNCTION__))
;
1291 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1292 unsigned Opc = MI->getOpcode();
1293
1294 if (!MI->getOperand(0).isReg() ||
1295 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1296 break;
1297
1298 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1299 //
1300 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1301 // on the truncated x is the same as the bit number on x.
1302 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1303 Opc == TargetOpcode::G_TRUNC) {
1304 Register NextReg = MI->getOperand(1).getReg();
1305 // Did we find something worth folding?
1306 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1307 break;
1308
1309 // NextReg is worth folding. Keep looking.
1310 Reg = NextReg;
1311 continue;
1312 }
1313
1314 // Attempt to find a suitable operation with a constant on one side.
1315 Optional<uint64_t> C;
1316 Register TestReg;
1317 switch (Opc) {
1318 default:
1319 break;
1320 case TargetOpcode::G_AND:
1321 case TargetOpcode::G_XOR: {
1322 TestReg = MI->getOperand(1).getReg();
1323 Register ConstantReg = MI->getOperand(2).getReg();
1324 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1325 if (!VRegAndVal) {
1326 // AND commutes, check the other side for a constant.
1327 // FIXME: Can we canonicalize the constant so that it's always on the
1328 // same side at some point earlier?
1329 std::swap(ConstantReg, TestReg);
1330 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1331 }
1332 if (VRegAndVal)
1333 C = VRegAndVal->Value.getSExtValue();
1334 break;
1335 }
1336 case TargetOpcode::G_ASHR:
1337 case TargetOpcode::G_LSHR:
1338 case TargetOpcode::G_SHL: {
1339 TestReg = MI->getOperand(1).getReg();
1340 auto VRegAndVal =
1341 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1342 if (VRegAndVal)
1343 C = VRegAndVal->Value.getSExtValue();
1344 break;
1345 }
1346 }
1347
1348 // Didn't find a constant or viable register. Bail out of the loop.
1349 if (!C || !TestReg.isValid())
1350 break;
1351
1352 // We found a suitable instruction with a constant. Check to see if we can
1353 // walk through the instruction.
1354 Register NextReg;
1355 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1356 switch (Opc) {
1357 default:
1358 break;
1359 case TargetOpcode::G_AND:
1360 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1361 if ((*C >> Bit) & 1)
1362 NextReg = TestReg;
1363 break;
1364 case TargetOpcode::G_SHL:
1365 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1366 // the type of the register.
1367 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1368 NextReg = TestReg;
1369 Bit = Bit - *C;
1370 }
1371 break;
1372 case TargetOpcode::G_ASHR:
1373 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1374 // in x
1375 NextReg = TestReg;
1376 Bit = Bit + *C;
1377 if (Bit >= TestRegSize)
1378 Bit = TestRegSize - 1;
1379 break;
1380 case TargetOpcode::G_LSHR:
1381 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1382 if ((Bit + *C) < TestRegSize) {
1383 NextReg = TestReg;
1384 Bit = Bit + *C;
1385 }
1386 break;
1387 case TargetOpcode::G_XOR:
1388 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1389 // appropriate.
1390 //
1391 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1392 //
1393 // tbz x', b -> tbnz x, b
1394 //
1395 // Because x' only has the b-th bit set if x does not.
1396 if ((*C >> Bit) & 1)
1397 Invert = !Invert;
1398 NextReg = TestReg;
1399 break;
1400 }
1401
1402 // Check if we found anything worth folding.
1403 if (!NextReg.isValid())
1404 return Reg;
1405 Reg = NextReg;
1406 }
1407
1408 return Reg;
1409}
1410
1411MachineInstr *AArch64InstructionSelector::emitTestBit(
1412 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1413 MachineIRBuilder &MIB) const {
1414 assert(TestReg.isValid())((TestReg.isValid()) ? static_cast<void> (0) : __assert_fail
("TestReg.isValid()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1414, __PRETTY_FUNCTION__))
;
1415 assert(ProduceNonFlagSettingCondBr &&((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1416, __PRETTY_FUNCTION__))
1416 "Cannot emit TB(N)Z with speculation tracking!")((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1416, __PRETTY_FUNCTION__))
;
1417 MachineRegisterInfo &MRI = *MIB.getMRI();
1418
1419 // Attempt to optimize the test bit by walking over instructions.
1420 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1421 LLT Ty = MRI.getType(TestReg);
1422 unsigned Size = Ty.getSizeInBits();
1423 assert(!Ty.isVector() && "Expected a scalar!")((!Ty.isVector() && "Expected a scalar!") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1423, __PRETTY_FUNCTION__))
;
1424 assert(Bit < 64 && "Bit is too large!")((Bit < 64 && "Bit is too large!") ? static_cast<
void> (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1424, __PRETTY_FUNCTION__))
;
1425
1426 // When the test register is a 64-bit register, we have to narrow to make
1427 // TBNZW work.
1428 bool UseWReg = Bit < 32;
1429 unsigned NecessarySize = UseWReg ? 32 : 64;
1430 if (Size != NecessarySize)
1431 TestReg = moveScalarRegClass(
1432 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1433 MIB);
1434
1435 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1436 {AArch64::TBZW, AArch64::TBNZW}};
1437 unsigned Opc = OpcTable[UseWReg][IsNegative];
1438 auto TestBitMI =
1439 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1440 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1441 return &*TestBitMI;
1442}
1443
1444bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1445 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1446 MachineIRBuilder &MIB) const {
1447 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")((AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"
) ? static_cast<void> (0) : __assert_fail ("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1447, __PRETTY_FUNCTION__))
;
1448 // Given something like this:
1449 //
1450 // %x = ...Something...
1451 // %one = G_CONSTANT i64 1
1452 // %zero = G_CONSTANT i64 0
1453 // %and = G_AND %x, %one
1454 // %cmp = G_ICMP intpred(ne), %and, %zero
1455 // %cmp_trunc = G_TRUNC %cmp
1456 // G_BRCOND %cmp_trunc, %bb.3
1457 //
1458 // We want to try and fold the AND into the G_BRCOND and produce either a
1459 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1460 //
1461 // In this case, we'd get
1462 //
1463 // TBNZ %x %bb.3
1464 //
1465
1466 // Check if the AND has a constant on its RHS which we can use as a mask.
1467 // If it's a power of 2, then it's the same as checking a specific bit.
1468 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1469 auto MaybeBit = getConstantVRegValWithLookThrough(
1470 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1471 if (!MaybeBit)
1472 return false;
1473
1474 int32_t Bit = MaybeBit->Value.exactLogBase2();
1475 if (Bit < 0)
1476 return false;
1477
1478 Register TestReg = AndInst.getOperand(1).getReg();
1479
1480 // Emit a TB(N)Z.
1481 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1482 return true;
1483}
1484
1485MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1486 bool IsNegative,
1487 MachineBasicBlock *DestMBB,
1488 MachineIRBuilder &MIB) const {
1489 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")((ProduceNonFlagSettingCondBr && "CBZ does not set flags!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1489, __PRETTY_FUNCTION__))
;
1490 MachineRegisterInfo &MRI = *MIB.getMRI();
1491 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1493, __PRETTY_FUNCTION__))
1492 AArch64::GPRRegBankID &&((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1493, __PRETTY_FUNCTION__))
1493 "Expected GPRs only?")((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1493, __PRETTY_FUNCTION__))
;
1494 auto Ty = MRI.getType(CompareReg);
1495 unsigned Width = Ty.getSizeInBits();
1496 assert(!Ty.isVector() && "Expected scalar only?")((!Ty.isVector() && "Expected scalar only?") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1496, __PRETTY_FUNCTION__))
;
1497 assert(Width <= 64 && "Expected width to be at most 64?")((Width <= 64 && "Expected width to be at most 64?"
) ? static_cast<void> (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1497, __PRETTY_FUNCTION__))
;
1498 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1499 {AArch64::CBNZW, AArch64::CBNZX}};
1500 unsigned Opc = OpcTable[IsNegative][Width == 64];
1501 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1502 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1503 return &*BranchMI;
1504}
1505
1506bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1507 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1508 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)((FCmp.getOpcode() == TargetOpcode::G_FCMP) ? static_cast<
void> (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1508, __PRETTY_FUNCTION__))
;
1509 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1509, __PRETTY_FUNCTION__))
;
1510 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1511 // totally clean. Some of them require two branches to implement.
1512 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1513 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1514 Pred);
1515 AArch64CC::CondCode CC1, CC2;
1516 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1517 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1518 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1519 if (CC2 != AArch64CC::AL)
1520 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1521 I.eraseFromParent();
1522 return true;
1523}
1524
1525bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1526 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1527 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)((ICmp.getOpcode() == TargetOpcode::G_ICMP) ? static_cast<
void> (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1527, __PRETTY_FUNCTION__))
;
1528 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __PRETTY_FUNCTION__))
;
1529 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1530 //
1531 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1532 // instructions will not be produced, as they are conditional branch
1533 // instructions that do not set flags.
1534 if (!ProduceNonFlagSettingCondBr)
1535 return false;
1536
1537 MachineRegisterInfo &MRI = *MIB.getMRI();
1538 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1539 auto Pred =
1540 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1541 Register LHS = ICmp.getOperand(2).getReg();
1542 Register RHS = ICmp.getOperand(3).getReg();
1543
1544 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1545 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1546 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1547
1548 // When we can emit a TB(N)Z, prefer that.
1549 //
1550 // Handle non-commutative condition codes first.
1551 // Note that we don't want to do this when we have a G_AND because it can
1552 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1553 if (VRegAndVal && !AndInst) {
1554 int64_t C = VRegAndVal->Value.getSExtValue();
1555
1556 // When we have a greater-than comparison, we can just test if the msb is
1557 // zero.
1558 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1559 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1560 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1561 I.eraseFromParent();
1562 return true;
1563 }
1564
1565 // When we have a less than comparison, we can just test if the msb is not
1566 // zero.
1567 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1568 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1569 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1570 I.eraseFromParent();
1571 return true;
1572 }
1573 }
1574
1575 // Attempt to handle commutative condition codes. Right now, that's only
1576 // eq/ne.
1577 if (ICmpInst::isEquality(Pred)) {
1578 if (!VRegAndVal) {
1579 std::swap(RHS, LHS);
1580 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1581 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1582 }
1583
1584 if (VRegAndVal && VRegAndVal->Value == 0) {
1585 // If there's a G_AND feeding into this branch, try to fold it away by
1586 // emitting a TB(N)Z instead.
1587 //
1588 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1589 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1590 // would be redundant.
1591 if (AndInst &&
1592 tryOptAndIntoCompareBranch(
1593 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1594 I.eraseFromParent();
1595 return true;
1596 }
1597
1598 // Otherwise, try to emit a CB(N)Z instead.
1599 auto LHSTy = MRI.getType(LHS);
1600 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1601 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1602 I.eraseFromParent();
1603 return true;
1604 }
1605 }
1606 }
1607
1608 return false;
1609}
1610
1611bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1612 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1613 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)((ICmp.getOpcode() == TargetOpcode::G_ICMP) ? static_cast<
void> (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1613, __PRETTY_FUNCTION__))
;
1614 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1614, __PRETTY_FUNCTION__))
;
1615 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1616 return true;
1617
1618 // Couldn't optimize. Emit a compare + a Bcc.
1619 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1620 auto PredOp = ICmp.getOperand(1);
1621 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1622 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1623 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1624 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1625 I.eraseFromParent();
1626 return true;
1627}
1628
1629bool AArch64InstructionSelector::selectCompareBranch(
1630 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1631 Register CondReg = I.getOperand(0).getReg();
1632 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1633 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1634 CondReg = CCMI->getOperand(1).getReg();
1635 CCMI = MRI.getVRegDef(CondReg);
1636 }
1637
1638 // Try to select the G_BRCOND using whatever is feeding the condition if
1639 // possible.
1640 MachineIRBuilder MIB(I);
1641 unsigned CCMIOpc = CCMI->getOpcode();
1642 if (CCMIOpc == TargetOpcode::G_FCMP)
1643 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1644 if (CCMIOpc == TargetOpcode::G_ICMP)
1645 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1646
1647 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1648 // instructions will not be produced, as they are conditional branch
1649 // instructions that do not set flags.
1650 if (ProduceNonFlagSettingCondBr) {
1651 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1652 I.getOperand(1).getMBB(), MIB);
1653 I.eraseFromParent();
1654 return true;
1655 }
1656
1657 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1658 auto TstMI =
1659 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1660 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1661 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1662 .addImm(AArch64CC::EQ)
1663 .addMBB(I.getOperand(1).getMBB());
1664 I.eraseFromParent();
1665 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1666}
1667
1668/// Returns the element immediate value of a vector shift operand if found.
1669/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1670static Optional<int64_t> getVectorShiftImm(Register Reg,
1671 MachineRegisterInfo &MRI) {
1672 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")((MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1672, __PRETTY_FUNCTION__))
;
1673 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1674 assert(OpMI && "Expected to find a vreg def for vector shift operand")((OpMI && "Expected to find a vreg def for vector shift operand"
) ? static_cast<void> (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1674, __PRETTY_FUNCTION__))
;
1675 return getAArch64VectorSplatScalar(*OpMI, MRI);
1676}
1677
1678/// Matches and returns the shift immediate value for a SHL instruction given
1679/// a shift operand.
1680static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1681 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1682 if (!ShiftImm)
1683 return None;
1684 // Check the immediate is in range for a SHL.
1685 int64_t Imm = *ShiftImm;
1686 if (Imm < 0)
1687 return None;
1688 switch (SrcTy.getElementType().getSizeInBits()) {
1689 default:
1690 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1691 return None;
1692 case 8:
1693 if (Imm > 7)
1694 return None;
1695 break;
1696 case 16:
1697 if (Imm > 15)
1698 return None;
1699 break;
1700 case 32:
1701 if (Imm > 31)
1702 return None;
1703 break;
1704 case 64:
1705 if (Imm > 63)
1706 return None;
1707 break;
1708 }
1709 return Imm;
1710}
1711
1712bool AArch64InstructionSelector::selectVectorSHL(
1713 MachineInstr &I, MachineRegisterInfo &MRI) const {
1714 assert(I.getOpcode() == TargetOpcode::G_SHL)((I.getOpcode() == TargetOpcode::G_SHL) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1714, __PRETTY_FUNCTION__))
;
1715 Register DstReg = I.getOperand(0).getReg();
1716 const LLT Ty = MRI.getType(DstReg);
1717 Register Src1Reg = I.getOperand(1).getReg();
1718 Register Src2Reg = I.getOperand(2).getReg();
1719
1720 if (!Ty.isVector())
1721 return false;
1722
1723 // Check if we have a vector of constants on RHS that we can select as the
1724 // immediate form.
1725 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1726
1727 unsigned Opc = 0;
1728 if (Ty == LLT::vector(2, 64)) {
1729 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1730 } else if (Ty == LLT::vector(4, 32)) {
1731 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1732 } else if (Ty == LLT::vector(2, 32)) {
1733 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1734 } else if (Ty == LLT::vector(4, 16)) {
1735 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1736 } else if (Ty == LLT::vector(8, 16)) {
1737 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1738 } else if (Ty == LLT::vector(16, 8)) {
1739 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1740 } else if (Ty == LLT::vector(8, 8)) {
1741 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1742 } else {
1743 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1744 return false;
1745 }
1746
1747 MachineIRBuilder MIB(I);
1748 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1749 if (ImmVal)
1750 Shl.addImm(*ImmVal);
1751 else
1752 Shl.addUse(Src2Reg);
1753 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1754 I.eraseFromParent();
1755 return true;
1756}
1757
1758bool AArch64InstructionSelector::selectVectorAshrLshr(
1759 MachineInstr &I, MachineRegisterInfo &MRI) const {
1760 assert(I.getOpcode() == TargetOpcode::G_ASHR ||((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1761, __PRETTY_FUNCTION__))
1761 I.getOpcode() == TargetOpcode::G_LSHR)((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1761, __PRETTY_FUNCTION__))
;
1762 Register DstReg = I.getOperand(0).getReg();
1763 const LLT Ty = MRI.getType(DstReg);
1764 Register Src1Reg = I.getOperand(1).getReg();
1765 Register Src2Reg = I.getOperand(2).getReg();
1766
1767 if (!Ty.isVector())
1768 return false;
1769
1770 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1771
1772 // We expect the immediate case to be lowered in the PostLegalCombiner to
1773 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1774
1775 // There is not a shift right register instruction, but the shift left
1776 // register instruction takes a signed value, where negative numbers specify a
1777 // right shift.
1778
1779 unsigned Opc = 0;
1780 unsigned NegOpc = 0;
1781 const TargetRegisterClass *RC =
1782 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1783 if (Ty == LLT::vector(2, 64)) {
1784 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1785 NegOpc = AArch64::NEGv2i64;
1786 } else if (Ty == LLT::vector(4, 32)) {
1787 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1788 NegOpc = AArch64::NEGv4i32;
1789 } else if (Ty == LLT::vector(2, 32)) {
1790 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1791 NegOpc = AArch64::NEGv2i32;
1792 } else if (Ty == LLT::vector(4, 16)) {
1793 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1794 NegOpc = AArch64::NEGv4i16;
1795 } else if (Ty == LLT::vector(8, 16)) {
1796 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1797 NegOpc = AArch64::NEGv8i16;
1798 } else if (Ty == LLT::vector(16, 8)) {
1799 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1800 NegOpc = AArch64::NEGv16i8;
1801 } else if (Ty == LLT::vector(8, 8)) {
1802 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1803 NegOpc = AArch64::NEGv8i8;
1804 } else {
1805 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1806 return false;
1807 }
1808
1809 MachineIRBuilder MIB(I);
1810 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1811 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1812 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1813 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1814 I.eraseFromParent();
1815 return true;
1816}
1817
1818bool AArch64InstructionSelector::selectVaStartAAPCS(
1819 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1820 return false;
1821}
1822
1823bool AArch64InstructionSelector::selectVaStartDarwin(
1824 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1825 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1826 Register ListReg = I.getOperand(0).getReg();
1827
1828 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1829
1830 auto MIB =
1831 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1832 .addDef(ArgsAddrReg)
1833 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1834 .addImm(0)
1835 .addImm(0);
1836
1837 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1838
1839 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1840 .addUse(ArgsAddrReg)
1841 .addUse(ListReg)
1842 .addImm(0)
1843 .addMemOperand(*I.memoperands_begin());
1844
1845 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1846 I.eraseFromParent();
1847 return true;
1848}
1849
1850void AArch64InstructionSelector::materializeLargeCMVal(
1851 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1852 MachineBasicBlock &MBB = *I.getParent();
1853 MachineFunction &MF = *MBB.getParent();
1854 MachineRegisterInfo &MRI = MF.getRegInfo();
1855 MachineIRBuilder MIB(I);
1856
1857 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1858 MovZ->addOperand(MF, I.getOperand(1));
1859 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1860 AArch64II::MO_NC);
1861 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1862 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1863
1864 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1865 Register ForceDstReg) {
1866 Register DstReg = ForceDstReg
1867 ? ForceDstReg
1868 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1869 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1870 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1871 MovI->addOperand(MF, MachineOperand::CreateGA(
1872 GV, MovZ->getOperand(1).getOffset(), Flags));
1873 } else {
1874 MovI->addOperand(
1875 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1876 MovZ->getOperand(1).getOffset(), Flags));
1877 }
1878 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1879 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1880 return DstReg;
1881 };
1882 Register DstReg = BuildMovK(MovZ.getReg(0),
1883 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1884 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1885 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1886}
1887
1888bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1889 MachineBasicBlock &MBB = *I.getParent();
1890 MachineFunction &MF = *MBB.getParent();
1891 MachineRegisterInfo &MRI = MF.getRegInfo();
1892
1893 switch (I.getOpcode()) {
1894 case TargetOpcode::G_SHL:
1895 case TargetOpcode::G_ASHR:
1896 case TargetOpcode::G_LSHR: {
1897 // These shifts are legalized to have 64 bit shift amounts because we want
1898 // to take advantage of the existing imported selection patterns that assume
1899 // the immediates are s64s. However, if the shifted type is 32 bits and for
1900 // some reason we receive input GMIR that has an s64 shift amount that's not
1901 // a G_CONSTANT, insert a truncate so that we can still select the s32
1902 // register-register variant.
1903 Register SrcReg = I.getOperand(1).getReg();
1904 Register ShiftReg = I.getOperand(2).getReg();
1905 const LLT ShiftTy = MRI.getType(ShiftReg);
1906 const LLT SrcTy = MRI.getType(SrcReg);
1907 if (SrcTy.isVector())
1908 return false;
1909 assert(!ShiftTy.isVector() && "unexpected vector shift ty")((!ShiftTy.isVector() && "unexpected vector shift ty"
) ? static_cast<void> (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1909, __PRETTY_FUNCTION__))
;
1910 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1911 return false;
1912 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1913 assert(AmtMI && "could not find a vreg definition for shift amount")((AmtMI && "could not find a vreg definition for shift amount"
) ? static_cast<void> (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1913, __PRETTY_FUNCTION__))
;
1914 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1915 // Insert a subregister copy to implement a 64->32 trunc
1916 MachineIRBuilder MIB(I);
1917 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1918 .addReg(ShiftReg, 0, AArch64::sub_32);
1919 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1920 I.getOperand(2).setReg(Trunc.getReg(0));
1921 }
1922 return true;
1923 }
1924 case TargetOpcode::G_STORE: {
1925 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1926 MachineOperand &SrcOp = I.getOperand(0);
1927 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1928 // Allow matching with imported patterns for stores of pointers. Unlike
1929 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1930 // and constrain.
1931 MachineIRBuilder MIB(I);
1932 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1933 Register NewSrc = Copy.getReg(0);
1934 SrcOp.setReg(NewSrc);
1935 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1936 Changed = true;
1937 }
1938 return Changed;
1939 }
1940 case TargetOpcode::G_PTR_ADD:
1941 return convertPtrAddToAdd(I, MRI);
1942 case TargetOpcode::G_LOAD: {
1943 // For scalar loads of pointers, we try to convert the dest type from p0
1944 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1945 // conversion, this should be ok because all users should have been
1946 // selected already, so the type doesn't matter for them.
1947 Register DstReg = I.getOperand(0).getReg();
1948 const LLT DstTy = MRI.getType(DstReg);
1949 if (!DstTy.isPointer())
1950 return false;
1951 MRI.setType(DstReg, LLT::scalar(64));
1952 return true;
1953 }
1954 case AArch64::G_DUP: {
1955 // Convert the type from p0 to s64 to help selection.
1956 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1957 if (!DstTy.getElementType().isPointer())
1958 return false;
1959 MachineIRBuilder MIB(I);
1960 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1961 MRI.setType(I.getOperand(0).getReg(),
1962 DstTy.changeElementType(LLT::scalar(64)));
1963 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1964 I.getOperand(1).setReg(NewSrc.getReg(0));
1965 return true;
1966 }
1967 case TargetOpcode::G_UITOFP:
1968 case TargetOpcode::G_SITOFP: {
1969 // If both source and destination regbanks are FPR, then convert the opcode
1970 // to G_SITOF so that the importer can select it to an fpr variant.
1971 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1972 // copy.
1973 Register SrcReg = I.getOperand(1).getReg();
1974 LLT SrcTy = MRI.getType(SrcReg);
1975 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1976 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1977 return false;
1978
1979 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1980 if (I.getOpcode() == TargetOpcode::G_SITOFP)
1981 I.setDesc(TII.get(AArch64::G_SITOF));
1982 else
1983 I.setDesc(TII.get(AArch64::G_UITOF));
1984 return true;
1985 }
1986 return false;
1987 }
1988 default:
1989 return false;
1990 }
1991}
1992
1993/// This lowering tries to look for G_PTR_ADD instructions and then converts
1994/// them to a standard G_ADD with a COPY on the source.
1995///
1996/// The motivation behind this is to expose the add semantics to the imported
1997/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1998/// because the selector works bottom up, uses before defs. By the time we
1999/// end up trying to select a G_PTR_ADD, we should have already attempted to
2000/// fold this into addressing modes and were therefore unsuccessful.
2001bool AArch64InstructionSelector::convertPtrAddToAdd(
2002 MachineInstr &I, MachineRegisterInfo &MRI) {
2003 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")((I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2003, __PRETTY_FUNCTION__))
;
2004 Register DstReg = I.getOperand(0).getReg();
2005 Register AddOp1Reg = I.getOperand(1).getReg();
2006 const LLT PtrTy = MRI.getType(DstReg);
2007 if (PtrTy.getAddressSpace() != 0)
2008 return false;
2009
2010 MachineIRBuilder MIB(I);
2011 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
2012 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2013 // Set regbanks on the registers.
2014 if (PtrTy.isVector())
2015 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2016 else
2017 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2018
2019 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2020 // %dst(intty) = G_ADD %intbase, off
2021 I.setDesc(TII.get(TargetOpcode::G_ADD));
2022 MRI.setType(DstReg, CastPtrTy);
2023 I.getOperand(1).setReg(PtrToInt.getReg(0));
2024 if (!select(*PtrToInt)) {
2025 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2026 return false;
2027 }
2028
2029 // Also take the opportunity here to try to do some optimization.
2030 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2031 Register NegatedReg;
2032 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2033 return true;
2034 I.getOperand(2).setReg(NegatedReg);
2035 I.setDesc(TII.get(TargetOpcode::G_SUB));
2036 return true;
2037}
2038
2039bool AArch64InstructionSelector::earlySelectSHL(
2040 MachineInstr &I, MachineRegisterInfo &MRI) const {
2041 // We try to match the immediate variant of LSL, which is actually an alias
2042 // for a special case of UBFM. Otherwise, we fall back to the imported
2043 // selector which will match the register variant.
2044 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")((I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2044, __PRETTY_FUNCTION__))
;
2045 const auto &MO = I.getOperand(2);
2046 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2047 if (!VRegAndVal)
2048 return false;
2049
2050 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2051 if (DstTy.isVector())
2052 return false;
2053 bool Is64Bit = DstTy.getSizeInBits() == 64;
2054 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2055 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2056 MachineIRBuilder MIB(I);
2057
2058 if (!Imm1Fn || !Imm2Fn)
2059 return false;
2060
2061 auto NewI =
2062 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2063 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2064
2065 for (auto &RenderFn : *Imm1Fn)
2066 RenderFn(NewI);
2067 for (auto &RenderFn : *Imm2Fn)
2068 RenderFn(NewI);
2069
2070 I.eraseFromParent();
2071 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2072}
2073
2074bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2075 MachineInstr &I, MachineRegisterInfo &MRI) {
2076 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")((I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2076, __PRETTY_FUNCTION__))
;
2077 // If we're storing a scalar, it doesn't matter what register bank that
2078 // scalar is on. All that matters is the size.
2079 //
2080 // So, if we see something like this (with a 32-bit scalar as an example):
2081 //
2082 // %x:gpr(s32) = ... something ...
2083 // %y:fpr(s32) = COPY %x:gpr(s32)
2084 // G_STORE %y:fpr(s32)
2085 //
2086 // We can fix this up into something like this:
2087 //
2088 // G_STORE %x:gpr(s32)
2089 //
2090 // And then continue the selection process normally.
2091 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2092 if (!DefDstReg.isValid())
2093 return false;
2094 LLT DefDstTy = MRI.getType(DefDstReg);
2095 Register StoreSrcReg = I.getOperand(0).getReg();
2096 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2097
2098 // If we get something strange like a physical register, then we shouldn't
2099 // go any further.
2100 if (!DefDstTy.isValid())
2101 return false;
2102
2103 // Are the source and dst types the same size?
2104 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2105 return false;
2106
2107 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2108 RBI.getRegBank(DefDstReg, MRI, TRI))
2109 return false;
2110
2111 // We have a cross-bank copy, which is entering a store. Let's fold it.
2112 I.getOperand(0).setReg(DefDstReg);
2113 return true;
2114}
2115
2116bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
2117 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2117, __PRETTY_FUNCTION__))
;
2118 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2118, __PRETTY_FUNCTION__))
;
2119
2120 MachineBasicBlock &MBB = *I.getParent();
2121 MachineFunction &MF = *MBB.getParent();
2122 MachineRegisterInfo &MRI = MF.getRegInfo();
2123
2124 switch (I.getOpcode()) {
2125 case AArch64::G_DUP: {
2126 // Before selecting a DUP instruction, check if it is better selected as a
2127 // MOV or load from a constant pool.
2128 Register Src = I.getOperand(1).getReg();
2129 auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
2130 if (!ValAndVReg)
2131 return false;
2132 LLVMContext &Ctx = MF.getFunction().getContext();
2133 Register Dst = I.getOperand(0).getReg();
2134 auto *CV = ConstantDataVector::getSplat(
2135 MRI.getType(Dst).getNumElements(),
2136 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2137 ValAndVReg->Value));
2138 MachineIRBuilder MIRBuilder(I);
2139 if (!emitConstantVector(Dst, CV, MIRBuilder, MRI))
2140 return false;
2141 I.eraseFromParent();
2142 return true;
2143 }
2144 case TargetOpcode::G_BR: {
2145 // If the branch jumps to the fallthrough block, don't bother emitting it.
2146 // Only do this for -O0 for a good code size improvement, because when
2147 // optimizations are enabled we want to leave this choice to
2148 // MachineBlockPlacement.
2149 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2150 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2151 return false;
2152 I.eraseFromParent();
2153 return true;
2154 }
2155 case TargetOpcode::G_SHL:
2156 return earlySelectSHL(I, MRI);
2157 case TargetOpcode::G_CONSTANT: {
2158 bool IsZero = false;
2159 if (I.getOperand(1).isCImm())
2160 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2161 else if (I.getOperand(1).isImm())
2162 IsZero = I.getOperand(1).getImm() == 0;
2163
2164 if (!IsZero)
2165 return false;
2166
2167 Register DefReg = I.getOperand(0).getReg();
2168 LLT Ty = MRI.getType(DefReg);
2169 if (Ty.getSizeInBits() == 64) {
2170 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2171 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2172 } else if (Ty.getSizeInBits() == 32) {
2173 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2174 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2175 } else
2176 return false;
2177
2178 I.setDesc(TII.get(TargetOpcode::COPY));
2179 return true;
2180 }
2181
2182 case TargetOpcode::G_ADD: {
2183 // Check if this is being fed by a G_ICMP on either side.
2184 //
2185 // (cmp pred, x, y) + z
2186 //
2187 // In the above case, when the cmp is true, we increment z by 1. So, we can
2188 // fold the add into the cset for the cmp by using cinc.
2189 //
2190 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2191 Register X = I.getOperand(1).getReg();
2192
2193 // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
2194 // early if we see it.
2195 LLT Ty = MRI.getType(X);
2196 if (Ty.isVector() || Ty.getSizeInBits() != 32)
2197 return false;
2198
2199 Register CmpReg = I.getOperand(2).getReg();
2200 MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2201 if (!Cmp) {
2202 std::swap(X, CmpReg);
2203 Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2204 if (!Cmp)
2205 return false;
2206 }
2207 MachineIRBuilder MIRBuilder(I);
2208 auto Pred =
2209 static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
2210 emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
2211 Cmp->getOperand(1), MIRBuilder);
2212 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder, X);
2213 I.eraseFromParent();
2214 return true;
2215 }
2216 default:
2217 return false;
2218 }
2219}
2220
2221bool AArch64InstructionSelector::select(MachineInstr &I) {
2222 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2222, __PRETTY_FUNCTION__))
;
2223 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2223, __PRETTY_FUNCTION__))
;
2224
2225 MachineBasicBlock &MBB = *I.getParent();
2226 MachineFunction &MF = *MBB.getParent();
2227 MachineRegisterInfo &MRI = MF.getRegInfo();
2228
2229 const AArch64Subtarget *Subtarget =
2230 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2231 if (Subtarget->requiresStrictAlign()) {
2232 // We don't support this feature yet.
2233 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2234 return false;
2235 }
2236
2237 unsigned Opcode = I.getOpcode();
2238 // G_PHI requires same handling as PHI
2239 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2240 // Certain non-generic instructions also need some special handling.
2241
2242 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2243 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2244
2245 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2246 const Register DefReg = I.getOperand(0).getReg();
2247 const LLT DefTy = MRI.getType(DefReg);
2248
2249 const RegClassOrRegBank &RegClassOrBank =
2250 MRI.getRegClassOrRegBank(DefReg);
2251
2252 const TargetRegisterClass *DefRC
2253 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2254 if (!DefRC) {
2255 if (!DefTy.isValid()) {
2256 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2257 return false;
2258 }
2259 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2260 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2261 if (!DefRC) {
2262 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2263 return false;
2264 }
2265 }
2266
2267 I.setDesc(TII.get(TargetOpcode::PHI));
2268
2269 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2270 }
2271
2272 if (I.isCopy())
2273 return selectCopy(I, TII, MRI, TRI, RBI);
2274
2275 return true;
2276 }
2277
2278
2279 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2280 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2281 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2282 return false;
2283 }
2284
2285 // Try to do some lowering before we start instruction selecting. These
2286 // lowerings are purely transformations on the input G_MIR and so selection
2287 // must continue after any modification of the instruction.
2288 if (preISelLower(I)) {
2289 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2290 }
2291
2292 // There may be patterns where the importer can't deal with them optimally,
2293 // but does select it to a suboptimal sequence so our custom C++ selection
2294 // code later never has a chance to work on it. Therefore, we have an early
2295 // selection attempt here to give priority to certain selection routines
2296 // over the imported ones.
2297 if (earlySelect(I))
2298 return true;
2299
2300 if (selectImpl(I, *CoverageInfo))
2301 return true;
2302
2303 LLT Ty =
2304 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2305
2306 MachineIRBuilder MIB(I);
2307
2308 switch (Opcode) {
2309 case TargetOpcode::G_SBFX:
2310 case TargetOpcode::G_UBFX: {
2311 static const unsigned OpcTable[2][2] = {
2312 {AArch64::UBFMWri, AArch64::UBFMXri},
2313 {AArch64::SBFMWri, AArch64::SBFMXri}};
2314 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2315 unsigned Size = Ty.getSizeInBits();
2316 unsigned Opc = OpcTable[IsSigned][Size == 64];
2317 auto Cst1 =
2318 getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2319 assert(Cst1 && "Should have gotten a constant for src 1?")((Cst1 && "Should have gotten a constant for src 1?")
? static_cast<void> (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2319, __PRETTY_FUNCTION__))
;
2320 auto Cst2 =
2321 getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2322 assert(Cst2 && "Should have gotten a constant for src 2?")((Cst2 && "Should have gotten a constant for src 2?")
? static_cast<void> (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2322, __PRETTY_FUNCTION__))
;
2323 auto LSB = Cst1->Value.getZExtValue();
2324 auto Width = Cst2->Value.getZExtValue();
2325 MachineIRBuilder MIB(I);
2326 auto BitfieldInst =
2327 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2328 .addImm(LSB)
2329 .addImm(Width);
2330 I.eraseFromParent();
2331 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2332 }
2333 case TargetOpcode::G_BRCOND:
2334 return selectCompareBranch(I, MF, MRI);
2335
2336 case TargetOpcode::G_BRINDIRECT: {
2337 I.setDesc(TII.get(AArch64::BR));
2338 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2339 }
2340
2341 case TargetOpcode::G_BRJT:
2342 return selectBrJT(I, MRI);
2343
2344 case AArch64::G_ADD_LOW: {
2345 // This op may have been separated from it's ADRP companion by the localizer
2346 // or some other code motion pass. Given that many CPUs will try to
2347 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2348 // which will later be expanded into an ADRP+ADD pair after scheduling.
2349 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2350 if (BaseMI->getOpcode() != AArch64::ADRP) {
2351 I.setDesc(TII.get(AArch64::ADDXri));
2352 I.addOperand(MachineOperand::CreateImm(0));
2353 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2354 }
2355 assert(TM.getCodeModel() == CodeModel::Small &&((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2356, __PRETTY_FUNCTION__))
2356 "Expected small code model")((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2356, __PRETTY_FUNCTION__))
;
2357 MachineIRBuilder MIB(I);
2358 auto Op1 = BaseMI->getOperand(1);
2359 auto Op2 = I.getOperand(2);
2360 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2361 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2362 Op1.getTargetFlags())
2363 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2364 Op2.getTargetFlags());
2365 I.eraseFromParent();
2366 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2367 }
2368
2369 case TargetOpcode::G_BSWAP: {
2370 // Handle vector types for G_BSWAP directly.
2371 Register DstReg = I.getOperand(0).getReg();
2372 LLT DstTy = MRI.getType(DstReg);
2373
2374 // We should only get vector types here; everything else is handled by the
2375 // importer right now.
2376 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2377 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2378 return false;
2379 }
2380
2381 // Only handle 4 and 2 element vectors for now.
2382 // TODO: 16-bit elements.
2383 unsigned NumElts = DstTy.getNumElements();
2384 if (NumElts != 4 && NumElts != 2) {
2385 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2386 return false;
2387 }
2388
2389 // Choose the correct opcode for the supported types. Right now, that's
2390 // v2s32, v4s32, and v2s64.
2391 unsigned Opc = 0;
2392 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2393 if (EltSize == 32)
2394 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2395 : AArch64::REV32v16i8;
2396 else if (EltSize == 64)
2397 Opc = AArch64::REV64v16i8;
2398
2399 // We should always get something by the time we get here...
2400 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")((Opc != 0 && "Didn't get an opcode for G_BSWAP?") ? static_cast
<void> (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2400, __PRETTY_FUNCTION__))
;
2401
2402 I.setDesc(TII.get(Opc));
2403 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2404 }
2405
2406 case TargetOpcode::G_FCONSTANT:
2407 case TargetOpcode::G_CONSTANT: {
2408 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2409
2410 const LLT s8 = LLT::scalar(8);
2411 const LLT s16 = LLT::scalar(16);
2412 const LLT s32 = LLT::scalar(32);
2413 const LLT s64 = LLT::scalar(64);
2414 const LLT s128 = LLT::scalar(128);
2415 const LLT p0 = LLT::pointer(0, 64);
2416
2417 const Register DefReg = I.getOperand(0).getReg();
2418 const LLT DefTy = MRI.getType(DefReg);
2419 const unsigned DefSize = DefTy.getSizeInBits();
2420 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2421
2422 // FIXME: Redundant check, but even less readable when factored out.
2423 if (isFP) {
2424 if (Ty != s32 && Ty != s64 && Ty != s128) {
2425 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2426 << " constant, expected: " << s32 << " or " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2427 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
;
2428 return false;
2429 }
2430
2431 if (RB.getID() != AArch64::FPRRegBankID) {
2432 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2433 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2434 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2435 return false;
2436 }
2437
2438 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2439 // can be sure tablegen works correctly and isn't rescued by this code.
2440 // 0.0 is not covered by tablegen for FP128. So we will handle this
2441 // scenario in the code here.
2442 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2443 return false;
2444 } else {
2445 // s32 and s64 are covered by tablegen.
2446 if (Ty != p0 && Ty != s8 && Ty != s16) {
2447 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2448 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2449 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2450 return false;
2451 }
2452
2453 if (RB.getID() != AArch64::GPRRegBankID) {
2454 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2455 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2456 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2457 return false;
2458 }
2459 }
2460
2461 // We allow G_CONSTANT of types < 32b.
2462 const unsigned MovOpc =
2463 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2464
2465 if (isFP) {
2466 // Either emit a FMOV, or emit a copy to emit a normal mov.
2467 const TargetRegisterClass &GPRRC =
2468 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2469 const TargetRegisterClass &FPRRC =
2470 DefSize == 32 ? AArch64::FPR32RegClass
2471 : (DefSize == 64 ? AArch64::FPR64RegClass
2472 : AArch64::FPR128RegClass);
2473
2474 // For 64b values, emit a constant pool load instead.
2475 // For s32, use a cp load if we have optsize/minsize.
2476 if (DefSize == 64 || DefSize == 128 ||
2477 (DefSize == 32 && shouldOptForSize(&MF))) {
2478 auto *FPImm = I.getOperand(1).getFPImm();
2479 MachineIRBuilder MIB(I);
2480 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2481 if (!LoadMI) {
2482 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2483 return false;
2484 }
2485 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2486 I.eraseFromParent();
2487 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2488 }
2489
2490 // Nope. Emit a copy and use a normal mov instead.
2491 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2492 MachineOperand &RegOp = I.getOperand(0);
2493 RegOp.setReg(DefGPRReg);
2494 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2495 MIB.buildCopy({DefReg}, {DefGPRReg});
2496
2497 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2498 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2499 return false;
2500 }
2501
2502 MachineOperand &ImmOp = I.getOperand(1);
2503 // FIXME: Is going through int64_t always correct?
2504 ImmOp.ChangeToImmediate(
2505 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2506 } else if (I.getOperand(1).isCImm()) {
2507 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2508 I.getOperand(1).ChangeToImmediate(Val);
2509 } else if (I.getOperand(1).isImm()) {
2510 uint64_t Val = I.getOperand(1).getImm();
2511 I.getOperand(1).ChangeToImmediate(Val);
2512 }
2513
2514 I.setDesc(TII.get(MovOpc));
2515 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2516 return true;
2517 }
2518 case TargetOpcode::G_EXTRACT: {
2519 Register DstReg = I.getOperand(0).getReg();
2520 Register SrcReg = I.getOperand(1).getReg();
2521 LLT SrcTy = MRI.getType(SrcReg);
2522 LLT DstTy = MRI.getType(DstReg);
2523 (void)DstTy;
2524 unsigned SrcSize = SrcTy.getSizeInBits();
2525
2526 if (SrcTy.getSizeInBits() > 64) {
2527 // This should be an extract of an s128, which is like a vector extract.
2528 if (SrcTy.getSizeInBits() != 128)
2529 return false;
2530 // Only support extracting 64 bits from an s128 at the moment.
2531 if (DstTy.getSizeInBits() != 64)
2532 return false;
2533
2534 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2535 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2536 // Check we have the right regbank always.
2537 assert(SrcRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2539, __PRETTY_FUNCTION__))
2538 DstRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2539, __PRETTY_FUNCTION__))
2539 "Wrong extract regbank!")((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2539, __PRETTY_FUNCTION__))
;
2540 (void)SrcRB;
2541
2542 // Emit the same code as a vector extract.
2543 // Offset must be a multiple of 64.
2544 unsigned Offset = I.getOperand(2).getImm();
2545 if (Offset % 64 != 0)
2546 return false;
2547 unsigned LaneIdx = Offset / 64;
2548 MachineIRBuilder MIB(I);
2549 MachineInstr *Extract = emitExtractVectorElt(
2550 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2551 if (!Extract)
2552 return false;
2553 I.eraseFromParent();
2554 return true;
2555 }
2556
2557 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2558 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2559 Ty.getSizeInBits() - 1);
2560
2561 if (SrcSize < 64) {
2562 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2563, __PRETTY_FUNCTION__))
2563 "unexpected G_EXTRACT types")((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2563, __PRETTY_FUNCTION__))
;
2564 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2565 }
2566
2567 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2568 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2569 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2570 .addReg(DstReg, 0, AArch64::sub_32);
2571 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2572 AArch64::GPR32RegClass, MRI);
2573 I.getOperand(0).setReg(DstReg);
2574
2575 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2576 }
2577
2578 case TargetOpcode::G_INSERT: {
2579 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2580 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2581 unsigned DstSize = DstTy.getSizeInBits();
2582 // Larger inserts are vectors, same-size ones should be something else by
2583 // now (split up or turned into COPYs).
2584 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2585 return false;
2586
2587 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2588 unsigned LSB = I.getOperand(3).getImm();
2589 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2590 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2591 MachineInstrBuilder(MF, I).addImm(Width - 1);
2592
2593 if (DstSize < 64) {
2594 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2595, __PRETTY_FUNCTION__))
2595 "unexpected G_INSERT types")((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2595, __PRETTY_FUNCTION__))
;
2596 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2597 }
2598
2599 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2600 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2601 TII.get(AArch64::SUBREG_TO_REG))
2602 .addDef(SrcReg)
2603 .addImm(0)
2604 .addUse(I.getOperand(2).getReg())
2605 .addImm(AArch64::sub_32);
2606 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2607 AArch64::GPR32RegClass, MRI);
2608 I.getOperand(2).setReg(SrcReg);
2609
2610 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2611 }
2612 case TargetOpcode::G_FRAME_INDEX: {
2613 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2614 if (Ty != LLT::pointer(0, 64)) {
2615 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2616 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2617 return false;
2618 }
2619 I.setDesc(TII.get(AArch64::ADDXri));
2620
2621 // MOs for a #0 shifted immediate.
2622 I.addOperand(MachineOperand::CreateImm(0));
2623 I.addOperand(MachineOperand::CreateImm(0));
2624
2625 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2626 }
2627
2628 case TargetOpcode::G_GLOBAL_VALUE: {
2629 auto GV = I.getOperand(1).getGlobal();
2630 if (GV->isThreadLocal())
2631 return selectTLSGlobalValue(I, MRI);
2632
2633 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2634 if (OpFlags & AArch64II::MO_GOT) {
2635 I.setDesc(TII.get(AArch64::LOADgot));
2636 I.getOperand(1).setTargetFlags(OpFlags);
2637 } else if (TM.getCodeModel() == CodeModel::Large) {
2638 // Materialize the global using movz/movk instructions.
2639 materializeLargeCMVal(I, GV, OpFlags);
2640 I.eraseFromParent();
2641 return true;
2642 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2643 I.setDesc(TII.get(AArch64::ADR));
2644 I.getOperand(1).setTargetFlags(OpFlags);
2645 } else {
2646 I.setDesc(TII.get(AArch64::MOVaddr));
2647 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2648 MachineInstrBuilder MIB(MF, I);
2649 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2650 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2651 }
2652 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2653 }
2654
2655 case TargetOpcode::G_ZEXTLOAD:
2656 case TargetOpcode::G_LOAD:
2657 case TargetOpcode::G_STORE: {
2658 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2659 MachineIRBuilder MIB(I);
2660
2661 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2662
2663 if (PtrTy != LLT::pointer(0, 64)) {
2664 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2665 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2666 return false;
2667 }
2668
2669 auto &MemOp = **I.memoperands_begin();
2670 uint64_t MemSizeInBytes = MemOp.getSize();
2671 if (MemOp.isAtomic()) {
2672 // For now we just support s8 acquire loads to be able to compile stack
2673 // protector code.
2674 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2675 MemSizeInBytes == 1) {
2676 I.setDesc(TII.get(AArch64::LDARB));
2677 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2678 }
2679 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Atomic load/store not fully supported yet\n"
; } } while (false)
;
2680 return false;
2681 }
2682 unsigned MemSizeInBits = MemSizeInBytes * 8;
2683
2684#ifndef NDEBUG
2685 const Register PtrReg = I.getOperand(1).getReg();
2686 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2687 // Sanity-check the pointer register.
2688 assert(PtrRB.getID() == AArch64::GPRRegBankID &&((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2689, __PRETTY_FUNCTION__))
2689 "Load/Store pointer operand isn't a GPR")((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2689, __PRETTY_FUNCTION__))
;
2690 assert(MRI.getType(PtrReg).isPointer() &&((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2691, __PRETTY_FUNCTION__))
2691 "Load/Store pointer operand isn't a pointer")((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2691, __PRETTY_FUNCTION__))
;
2692#endif
2693
2694 const Register ValReg = I.getOperand(0).getReg();
2695 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2696
2697 // Helper lambda for partially selecting I. Either returns the original
2698 // instruction with an updated opcode, or a new instruction.
2699 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2700 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
1
Assuming the condition is true
2701 const unsigned NewOpc =
2702 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2703 if (NewOpc == I.getOpcode())
2
Taking false branch
2704 return nullptr;
2705 // Check if we can fold anything into the addressing mode.
2706 auto AddrModeFns =
2707 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2708 if (!AddrModeFns) {
2709 // Can't fold anything. Use the original instruction.
2710 I.setDesc(TII.get(NewOpc));
2711 I.addOperand(MachineOperand::CreateImm(0));
2712 return &I;
2713 }
2714
2715 // Folded something. Create a new instruction and return it.
2716 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2717 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2718 NewInst.cloneMemRefs(I);
2719 for (auto &Fn : *AddrModeFns)
2720 Fn(NewInst);
2721 I.eraseFromParent();
2722 return &*NewInst;
2723 };
2724
2725 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2726 if (!LoadStore)
2727 return false;
2728
2729 // If we're storing a 0, use WZR/XZR.
2730 if (Opcode == TargetOpcode::G_STORE) {
2731 auto CVal = getConstantVRegValWithLookThrough(
2732 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2733 /*HandleFConstants = */ false);
2734 if (CVal && CVal->Value == 0) {
2735 switch (LoadStore->getOpcode()) {
2736 case AArch64::STRWui:
2737 case AArch64::STRHHui:
2738 case AArch64::STRBBui:
2739 LoadStore->getOperand(0).setReg(AArch64::WZR);
2740 break;
2741 case AArch64::STRXui:
2742 LoadStore->getOperand(0).setReg(AArch64::XZR);
2743 break;
2744 }
2745 }
2746 }
2747
2748 if (IsZExtLoad) {
2749 // The zextload from a smaller type to i32 should be handled by the
2750 // importer.
2751 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2752 return false;
2753 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2754 // and zero_extend with SUBREG_TO_REG.
2755 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2756 Register DstReg = LoadStore->getOperand(0).getReg();
2757 LoadStore->getOperand(0).setReg(LdReg);
2758
2759 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2760 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2761 .addImm(0)
2762 .addUse(LdReg)
2763 .addImm(AArch64::sub_32);
2764 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2765 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2766 MRI);
2767 }
2768 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2769 }
2770
2771 case TargetOpcode::G_SMULH:
2772 case TargetOpcode::G_UMULH: {
2773 // Reject the various things we don't support yet.
2774 if (unsupportedBinOp(I, RBI, MRI, TRI))
2775 return false;
2776
2777 const Register DefReg = I.getOperand(0).getReg();
2778 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2779
2780 if (RB.getID() != AArch64::GPRRegBankID) {
2781 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2782 return false;
2783 }
2784
2785 if (Ty != LLT::scalar(64)) {
2786 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2787 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2788 return false;
2789 }
2790
2791 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2792 : AArch64::UMULHrr;
2793 I.setDesc(TII.get(NewOpc));
2794
2795 // Now that we selected an opcode, we need to constrain the register
2796 // operands to use appropriate classes.
2797 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2798 }
2799 case TargetOpcode::G_LSHR:
2800 case TargetOpcode::G_ASHR:
2801 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2802 return selectVectorAshrLshr(I, MRI);
2803 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2804 case TargetOpcode::G_SHL:
2805 if (Opcode == TargetOpcode::G_SHL &&
2806 MRI.getType(I.getOperand(0).getReg()).isVector())
2807 return selectVectorSHL(I, MRI);
2808 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2809 case TargetOpcode::G_FADD:
2810 case TargetOpcode::G_FSUB:
2811 case TargetOpcode::G_FMUL:
2812 case TargetOpcode::G_FDIV:
2813 case TargetOpcode::G_OR: {
2814 // Reject the various things we don't support yet.
2815 if (unsupportedBinOp(I, RBI, MRI, TRI))
2816 return false;
2817
2818 const unsigned OpSize = Ty.getSizeInBits();
2819
2820 const Register DefReg = I.getOperand(0).getReg();
2821 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2822
2823 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2824 if (NewOpc == I.getOpcode())
2825 return false;
2826
2827 I.setDesc(TII.get(NewOpc));
2828 // FIXME: Should the type be always reset in setDesc?
2829
2830 // Now that we selected an opcode, we need to constrain the register
2831 // operands to use appropriate classes.
2832 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2833 }
2834
2835 case TargetOpcode::G_PTR_ADD: {
2836 MachineIRBuilder MIRBuilder(I);
2837 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2838 MIRBuilder);
2839 I.eraseFromParent();
2840 return true;
2841 }
2842 case TargetOpcode::G_SADDO:
2843 case TargetOpcode::G_UADDO:
2844 case TargetOpcode::G_SSUBO:
2845 case TargetOpcode::G_USUBO: {
2846 // Emit the operation and get the correct condition code.
2847 MachineIRBuilder MIRBuilder(I);
2848 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2849 I.getOperand(2), I.getOperand(3), MIRBuilder);
2850
2851 // Now, put the overflow result in the register given by the first operand
2852 // to the overflow op. CSINC increments the result when the predicate is
2853 // false, so to get the increment when it's true, we need to use the
2854 // inverse. In this case, we want to increment when carry is set.
2855 Register ZReg = AArch64::WZR;
2856 auto CsetMI = MIRBuilder
2857 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2858 {ZReg, ZReg})
2859 .addImm(getInvertedCondCode(OpAndCC.second));
2860 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2861 I.eraseFromParent();
2862 return true;
2863 }
2864
2865 case TargetOpcode::G_PTRMASK: {
2866 Register MaskReg = I.getOperand(2).getReg();
2867 Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
2868 // TODO: Implement arbitrary cases
2869 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2870 return false;
2871
2872 uint64_t Mask = *MaskVal;
2873 I.setDesc(TII.get(AArch64::ANDXri));
2874 I.getOperand(2).ChangeToImmediate(
2875 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2876
2877 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2878 }
2879 case TargetOpcode::G_PTRTOINT:
2880 case TargetOpcode::G_TRUNC: {
2881 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2882 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2883
2884 const Register DstReg = I.getOperand(0).getReg();
2885 const Register SrcReg = I.getOperand(1).getReg();
2886
2887 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2888 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2889
2890 if (DstRB.getID() != SrcRB.getID()) {
2891 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
2892 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
2893 return false;
2894 }
2895
2896 if (DstRB.getID() == AArch64::GPRRegBankID) {
2897 const TargetRegisterClass *DstRC =
2898 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2899 if (!DstRC)
2900 return false;
2901
2902 const TargetRegisterClass *SrcRC =
2903 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2904 if (!SrcRC)
2905 return false;
2906
2907 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2908 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2909 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2910 return false;
2911 }
2912
2913 if (DstRC == SrcRC) {
2914 // Nothing to be done
2915 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2916 SrcTy == LLT::scalar(64)) {
2917 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2917)
;
2918 return false;
2919 } else if (DstRC == &AArch64::GPR32RegClass &&
2920 SrcRC == &AArch64::GPR64RegClass) {
2921 I.getOperand(1).setSubReg(AArch64::sub_32);
2922 } else {
2923 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
2924 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2925 return false;
2926 }
2927
2928 I.setDesc(TII.get(TargetOpcode::COPY));
2929 return true;
2930 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2931 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2932 I.setDesc(TII.get(AArch64::XTNv4i16));
2933 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2934 return true;
2935 }
2936
2937 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2938 MachineIRBuilder MIB(I);
2939 MachineInstr *Extract = emitExtractVectorElt(
2940 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2941 if (!Extract)
2942 return false;
2943 I.eraseFromParent();
2944 return true;
2945 }
2946
2947 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2948 if (Opcode == TargetOpcode::G_PTRTOINT) {
2949 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")((DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? static_cast<void> (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2949, __PRETTY_FUNCTION__))
;
2950 I.setDesc(TII.get(TargetOpcode::COPY));
2951 return true;
2952 }
2953 }
2954
2955 return false;
2956 }
2957
2958 case TargetOpcode::G_ANYEXT: {
2959 const Register DstReg = I.getOperand(0).getReg();
2960 const Register SrcReg = I.getOperand(1).getReg();
2961
2962 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2963 if (RBDst.getID() != AArch64::GPRRegBankID) {
2964 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
2965 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
2966 return false;
2967 }
2968
2969 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2970 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2971 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
2972 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
2973 return false;
2974 }
2975
2976 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2977
2978 if (DstSize == 0) {
2979 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
2980 return false;
2981 }
2982
2983 if (DstSize != 64 && DstSize > 32) {
2984 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
2985 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
2986 return false;
2987 }
2988 // At this point G_ANYEXT is just like a plain COPY, but we need
2989 // to explicitly form the 64-bit value if any.
2990 if (DstSize > 32) {
2991 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2992 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2993 .addDef(ExtSrc)
2994 .addImm(0)
2995 .addUse(SrcReg)
2996 .addImm(AArch64::sub_32);
2997 I.getOperand(1).setReg(ExtSrc);
2998 }
2999 return selectCopy(I, TII, MRI, TRI, RBI);
3000 }
3001
3002 case TargetOpcode::G_ZEXT:
3003 case TargetOpcode::G_SEXT_INREG:
3004 case TargetOpcode::G_SEXT: {
3005 unsigned Opcode = I.getOpcode();
3006 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3007 const Register DefReg = I.getOperand(0).getReg();
3008 Register SrcReg = I.getOperand(1).getReg();
3009 const LLT DstTy = MRI.getType(DefReg);
3010 const LLT SrcTy = MRI.getType(SrcReg);
3011 unsigned DstSize = DstTy.getSizeInBits();
3012 unsigned SrcSize = SrcTy.getSizeInBits();
3013
3014 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3015 // extended is encoded in the imm.
3016 if (Opcode == TargetOpcode::G_SEXT_INREG)
3017 SrcSize = I.getOperand(2).getImm();
3018
3019 if (DstTy.isVector())
3020 return false; // Should be handled by imported patterns.
3021
3022 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3024, __PRETTY_FUNCTION__))
3023 AArch64::GPRRegBankID &&(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3024, __PRETTY_FUNCTION__))
3024 "Unexpected ext regbank")(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3024, __PRETTY_FUNCTION__))
;
3025
3026 MachineIRBuilder MIB(I);
3027 MachineInstr *ExtI;
3028
3029 // First check if we're extending the result of a load which has a dest type
3030 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3031 // GPR register on AArch64 and all loads which are smaller automatically
3032 // zero-extend the upper bits. E.g.
3033 // %v(s8) = G_LOAD %p, :: (load 1)
3034 // %v2(s32) = G_ZEXT %v(s8)
3035 if (!IsSigned) {
3036 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3037 bool IsGPR =
3038 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3039 if (LoadMI && IsGPR) {
3040 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3041 unsigned BytesLoaded = MemOp->getSize();
3042 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3043 return selectCopy(I, TII, MRI, TRI, RBI);
3044 }
3045
3046 // If we are zero extending from 32 bits to 64 bits, it's possible that
3047 // the instruction implicitly does the zero extend for us. In that case,
3048 // we can just emit a SUBREG_TO_REG.
3049 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3050 // Unlike with the G_LOAD case, we don't want to look through copies
3051 // here.
3052 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3053 if (Def && isDef32(*Def)) {
3054 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3055 .addImm(0)
3056 .addUse(SrcReg)
3057 .addImm(AArch64::sub_32);
3058
3059 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3060 MRI)) {
3061 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3062 return false;
3063 }
3064
3065 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3066 MRI)) {
3067 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3068 return false;
3069 }
3070
3071 I.eraseFromParent();
3072 return true;
3073 }
3074 }
3075 }
3076
3077 if (DstSize == 64) {
3078 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3079 // FIXME: Can we avoid manually doing this?
3080 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3081 MRI)) {
3082 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3083 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3084 return false;
3085 }
3086 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3087 {&AArch64::GPR64RegClass}, {})
3088 .addImm(0)
3089 .addUse(SrcReg)
3090 .addImm(AArch64::sub_32)
3091 .getReg(0);
3092 }
3093
3094 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3095 {DefReg}, {SrcReg})
3096 .addImm(0)
3097 .addImm(SrcSize - 1);
3098 } else if (DstSize <= 32) {
3099 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3100 {DefReg}, {SrcReg})
3101 .addImm(0)
3102 .addImm(SrcSize - 1);
3103 } else {
3104 return false;
3105 }
3106
3107 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3108 I.eraseFromParent();
3109 return true;
3110 }
3111
3112 case TargetOpcode::G_SITOFP:
3113 case TargetOpcode::G_UITOFP:
3114 case TargetOpcode::G_FPTOSI:
3115 case TargetOpcode::G_FPTOUI: {
3116 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3117 SrcTy = MRI.getType(I.getOperand(1).getReg());
3118 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3119 if (NewOpc == Opcode)
3120 return false;
3121
3122 I.setDesc(TII.get(NewOpc));
3123 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3124
3125 return true;
3126 }
3127
3128 case TargetOpcode::G_FREEZE:
3129 return selectCopy(I, TII, MRI, TRI, RBI);
3130
3131 case TargetOpcode::G_INTTOPTR:
3132 // The importer is currently unable to import pointer types since they
3133 // didn't exist in SelectionDAG.
3134 return selectCopy(I, TII, MRI, TRI, RBI);
3135
3136 case TargetOpcode::G_BITCAST:
3137 // Imported SelectionDAG rules can handle every bitcast except those that
3138 // bitcast from a type to the same type. Ideally, these shouldn't occur
3139 // but we might not run an optimizer that deletes them. The other exception
3140 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3141 // of them.
3142 return selectCopy(I, TII, MRI, TRI, RBI);
3143
3144 case TargetOpcode::G_SELECT: {
3145 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3146 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3147 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3148 return false;
3149 }
3150
3151 const Register CondReg = I.getOperand(1).getReg();
3152 const Register TReg = I.getOperand(2).getReg();
3153 const Register FReg = I.getOperand(3).getReg();
3154
3155 if (tryOptSelect(I))
3156 return true;
3157
3158 // Make sure to use an unused vreg instead of wzr, so that the peephole
3159 // optimizations will be able to optimize these.
3160 MachineIRBuilder MIB(I);
3161 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3162 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3163 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3164 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3165 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3166 return false;
3167 I.eraseFromParent();
3168 return true;
3169 }
3170 case TargetOpcode::G_ICMP: {
3171 if (Ty.isVector())
3172 return selectVectorICmp(I, MRI);
3173
3174 if (Ty != LLT::scalar(32)) {
3175 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3176 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3177 return false;
3178 }
3179
3180 MachineIRBuilder MIRBuilder(I);
3181 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3182 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3183 MIRBuilder);
3184 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
3185 I.eraseFromParent();
3186 return true;
3187 }
3188
3189 case TargetOpcode::G_FCMP: {
3190 MachineIRBuilder MIRBuilder(I);
3191 CmpInst::Predicate Pred =
3192 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3193 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
3194 MIRBuilder, Pred) ||
3195 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
3196 return false;
3197 I.eraseFromParent();
3198 return true;
3199 }
3200 case TargetOpcode::G_VASTART:
3201 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3202 : selectVaStartAAPCS(I, MF, MRI);
3203 case TargetOpcode::G_INTRINSIC:
3204 return selectIntrinsic(I, MRI);
3205 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3206 return selectIntrinsicWithSideEffects(I, MRI);
3207 case TargetOpcode::G_IMPLICIT_DEF: {
3208 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3209 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3210 const Register DstReg = I.getOperand(0).getReg();
3211 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3212 const TargetRegisterClass *DstRC =
3213 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3214 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3215 return true;
3216 }
3217 case TargetOpcode::G_BLOCK_ADDR: {
3218 if (TM.getCodeModel() == CodeModel::Large) {
3219 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3220 I.eraseFromParent();
3221 return true;
3222 } else {
3223 I.setDesc(TII.get(AArch64::MOVaddrBA));
3224 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3225 I.getOperand(0).getReg())
3226 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3227 /* Offset */ 0, AArch64II::MO_PAGE)
3228 .addBlockAddress(
3229 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3230 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3231 I.eraseFromParent();
3232 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3233 }
3234 }
3235 case AArch64::G_DUP: {
3236 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3237 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3238 // difficult because at RBS we may end up pessimizing the fpr case if we
3239 // decided to add an anyextend to fix this. Manual selection is the most
3240 // robust solution for now.
3241 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3242 AArch64::GPRRegBankID)
3243 return false; // We expect the fpr regbank case to be imported.
3244 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3245 if (VecTy == LLT::vector(8, 8))
3246 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3247 else if (VecTy == LLT::vector(16, 8))
3248 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3249 else if (VecTy == LLT::vector(4, 16))
3250 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3251 else if (VecTy == LLT::vector(8, 16))
3252 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3253 else
3254 return false;
3255 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3256 }
3257 case TargetOpcode::G_INTRINSIC_TRUNC:
3258 return selectIntrinsicTrunc(I, MRI);
3259 case TargetOpcode::G_INTRINSIC_ROUND:
3260 return selectIntrinsicRound(I, MRI);
3261 case TargetOpcode::G_BUILD_VECTOR:
3262 return selectBuildVector(I, MRI);
3263 case TargetOpcode::G_MERGE_VALUES:
3264 return selectMergeValues(I, MRI);
3265 case TargetOpcode::G_UNMERGE_VALUES:
3266 return selectUnmergeValues(I, MRI);
3267 case TargetOpcode::G_SHUFFLE_VECTOR:
3268 return selectShuffleVector(I, MRI);
3269 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3270 return selectExtractElt(I, MRI);
3271 case TargetOpcode::G_INSERT_VECTOR_ELT:
3272 return selectInsertElt(I, MRI);
3273 case TargetOpcode::G_CONCAT_VECTORS:
3274 return selectConcatVectors(I, MRI);
3275 case TargetOpcode::G_JUMP_TABLE:
3276 return selectJumpTable(I, MRI);
3277 case TargetOpcode::G_VECREDUCE_FADD:
3278 case TargetOpcode::G_VECREDUCE_ADD:
3279 return selectReduction(I, MRI);
3280 }
3281
3282 return false;
3283}
3284
3285bool AArch64InstructionSelector::selectReduction(
3286 MachineInstr &I, MachineRegisterInfo &MRI) const {
3287 Register VecReg = I.getOperand(1).getReg();
3288 LLT VecTy = MRI.getType(VecReg);
3289 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3290 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3291 // a subregister copy afterwards.
3292 if (VecTy == LLT::vector(2, 32)) {
3293 MachineIRBuilder MIB(I);
3294 Register DstReg = I.getOperand(0).getReg();
3295 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3296 {VecReg, VecReg});
3297 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3298 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3299 .getReg(0);
3300 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3301 I.eraseFromParent();
3302 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3303 }
3304
3305 unsigned Opc = 0;
3306 if (VecTy == LLT::vector(16, 8))
3307 Opc = AArch64::ADDVv16i8v;
3308 else if (VecTy == LLT::vector(8, 16))
3309 Opc = AArch64::ADDVv8i16v;
3310 else if (VecTy == LLT::vector(4, 32))
3311 Opc = AArch64::ADDVv4i32v;
3312 else if (VecTy == LLT::vector(2, 64))
3313 Opc = AArch64::ADDPv2i64p;
3314 else {
3315 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3316 return false;
3317 }
3318 I.setDesc(TII.get(Opc));
3319 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3320 }
3321
3322 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3323 unsigned Opc = 0;
3324 if (VecTy == LLT::vector(2, 32))
3325 Opc = AArch64::FADDPv2i32p;
3326 else if (VecTy == LLT::vector(2, 64))
3327 Opc = AArch64::FADDPv2i64p;
3328 else {
3329 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3330 return false;
3331 }
3332 I.setDesc(TII.get(Opc));
3333 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3334 }
3335 return false;
3336}
3337
3338bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3339 MachineRegisterInfo &MRI) const {
3340 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")((I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3340, __PRETTY_FUNCTION__))
;
3341 Register JTAddr = I.getOperand(0).getReg();
3342 unsigned JTI = I.getOperand(1).getIndex();
3343 Register Index = I.getOperand(2).getReg();
3344 MachineIRBuilder MIB(I);
3345
3346 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3347 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3348
3349 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3350 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3351 {TargetReg, ScratchReg}, {JTAddr, Index})
3352 .addJumpTableIndex(JTI);
3353 // Build the indirect branch.
3354 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3355 I.eraseFromParent();
3356 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3357}
3358
3359bool AArch64InstructionSelector::selectJumpTable(
3360 MachineInstr &I, MachineRegisterInfo &MRI) const {
3361 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")((I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3361, __PRETTY_FUNCTION__))
;
3362 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")((I.getOperand(1).isJTI() && "Jump table op should have a JTI!"
) ? static_cast<void> (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3362, __PRETTY_FUNCTION__))
;
3363
3364 Register DstReg = I.getOperand(0).getReg();
3365 unsigned JTI = I.getOperand(1).getIndex();
3366 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3367 MachineIRBuilder MIB(I);
3368 auto MovMI =
3369 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3370 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3371 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3372 I.eraseFromParent();
3373 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3374}
3375
3376bool AArch64InstructionSelector::selectTLSGlobalValue(
3377 MachineInstr &I, MachineRegisterInfo &MRI) const {
3378 if (!STI.isTargetMachO())
3379 return false;
3380 MachineFunction &MF = *I.getParent()->getParent();
3381 MF.getFrameInfo().setAdjustsStack(true);
3382
3383 const GlobalValue &GV = *I.getOperand(1).getGlobal();
3384 MachineIRBuilder MIB(I);
3385
3386 auto LoadGOT =
3387 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3388 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3389
3390 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3391 {LoadGOT.getReg(0)})
3392 .addImm(0);
3393
3394 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3395 // TLS calls preserve all registers except those that absolutely must be
3396 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3397 // silly).
3398 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3399 .addUse(AArch64::X0, RegState::Implicit)
3400 .addDef(AArch64::X0, RegState::Implicit)
3401 .addRegMask(TRI.getTLSCallPreservedMask());
3402
3403 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3404 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3405 MRI);
3406 I.eraseFromParent();
3407 return true;
3408}
3409
3410bool AArch64InstructionSelector::selectIntrinsicTrunc(
3411 MachineInstr &I, MachineRegisterInfo &MRI) const {
3412 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3413
3414 // Select the correct opcode.
3415 unsigned Opc = 0;
3416 if (!SrcTy.isVector()) {
3417 switch (SrcTy.getSizeInBits()) {
3418 default:
3419 case 16:
3420 Opc = AArch64::FRINTZHr;
3421 break;
3422 case 32:
3423 Opc = AArch64::FRINTZSr;
3424 break;
3425 case 64:
3426 Opc = AArch64::FRINTZDr;
3427 break;
3428 }
3429 } else {
3430 unsigned NumElts = SrcTy.getNumElements();
3431 switch (SrcTy.getElementType().getSizeInBits()) {
3432 default:
3433 break;
3434 case 16:
3435 if (NumElts == 4)
3436 Opc = AArch64::FRINTZv4f16;
3437 else if (NumElts == 8)
3438 Opc = AArch64::FRINTZv8f16;
3439 break;
3440 case 32:
3441 if (NumElts == 2)
3442 Opc = AArch64::FRINTZv2f32;
3443 else if (NumElts == 4)
3444 Opc = AArch64::FRINTZv4f32;
3445 break;
3446 case 64:
3447 if (NumElts == 2)
3448 Opc = AArch64::FRINTZv2f64;
3449 break;
3450 }
3451 }
3452
3453 if (!Opc) {
3454 // Didn't get an opcode above, bail.
3455 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3456 return false;
3457 }
3458
3459 // Legalization would have set us up perfectly for this; we just need to
3460 // set the opcode and move on.
3461 I.setDesc(TII.get(Opc));
3462 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3463}
3464
3465bool AArch64InstructionSelector::selectIntrinsicRound(
3466 MachineInstr &I, MachineRegisterInfo &MRI) const {
3467 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3468
3469 // Select the correct opcode.
3470 unsigned Opc = 0;
3471 if (!SrcTy.isVector()) {
3472 switch (SrcTy.getSizeInBits()) {
3473 default:
3474 case 16:
3475 Opc = AArch64::FRINTAHr;
3476 break;
3477 case 32:
3478 Opc = AArch64::FRINTASr;
3479 break;
3480 case 64:
3481 Opc = AArch64::FRINTADr;
3482 break;
3483 }
3484 } else {
3485 unsigned NumElts = SrcTy.getNumElements();
3486 switch (SrcTy.getElementType().getSizeInBits()) {
3487 default:
3488 break;
3489 case 16:
3490 if (NumElts == 4)
3491 Opc = AArch64::FRINTAv4f16;
3492 else if (NumElts == 8)
3493 Opc = AArch64::FRINTAv8f16;
3494 break;
3495 case 32:
3496 if (NumElts == 2)
3497 Opc = AArch64::FRINTAv2f32;
3498 else if (NumElts == 4)
3499 Opc = AArch64::FRINTAv4f32;
3500 break;
3501 case 64:
3502 if (NumElts == 2)
3503 Opc = AArch64::FRINTAv2f64;
3504 break;
3505 }
3506 }
3507
3508 if (!Opc) {
3509 // Didn't get an opcode above, bail.
3510 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3511 return false;
3512 }
3513
3514 // Legalization would have set us up perfectly for this; we just need to
3515 // set the opcode and move on.
3516 I.setDesc(TII.get(Opc));
3517 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3518}
3519
3520bool AArch64InstructionSelector::selectVectorICmp(
3521 MachineInstr &I, MachineRegisterInfo &MRI) const {
3522 Register DstReg = I.getOperand(0).getReg();
3523 LLT DstTy = MRI.getType(DstReg);
3524 Register SrcReg = I.getOperand(2).getReg();
3525 Register Src2Reg = I.getOperand(3).getReg();
3526 LLT SrcTy = MRI.getType(SrcReg);
3527
3528 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3529 unsigned NumElts = DstTy.getNumElements();
3530
3531 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3532 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3533 // Third index is cc opcode:
3534 // 0 == eq
3535 // 1 == ugt
3536 // 2 == uge
3537 // 3 == ult
3538 // 4 == ule
3539 // 5 == sgt
3540 // 6 == sge
3541 // 7 == slt
3542 // 8 == sle
3543 // ne is done by negating 'eq' result.
3544
3545 // This table below assumes that for some comparisons the operands will be
3546 // commuted.
3547 // ult op == commute + ugt op
3548 // ule op == commute + uge op
3549 // slt op == commute + sgt op
3550 // sle op == commute + sge op
3551 unsigned PredIdx = 0;
3552 bool SwapOperands = false;
3553 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3554 switch (Pred) {
3555 case CmpInst::ICMP_NE:
3556 case CmpInst::ICMP_EQ:
3557 PredIdx = 0;
3558 break;
3559 case CmpInst::ICMP_UGT:
3560 PredIdx = 1;
3561 break;
3562 case CmpInst::ICMP_UGE:
3563 PredIdx = 2;
3564 break;
3565 case CmpInst::ICMP_ULT:
3566 PredIdx = 3;
3567 SwapOperands = true;
3568 break;
3569 case CmpInst::ICMP_ULE:
3570 PredIdx = 4;
3571 SwapOperands = true;
3572 break;
3573 case CmpInst::ICMP_SGT:
3574 PredIdx = 5;
3575 break;
3576 case CmpInst::ICMP_SGE:
3577 PredIdx = 6;
3578 break;
3579 case CmpInst::ICMP_SLT:
3580 PredIdx = 7;
3581 SwapOperands = true;
3582 break;
3583 case CmpInst::ICMP_SLE:
3584 PredIdx = 8;
3585 SwapOperands = true;
3586 break;
3587 default:
3588 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3588)
;
3589 return false;
3590 }
3591
3592 // This table obviously should be tablegen'd when we have our GISel native
3593 // tablegen selector.
3594
3595 static const unsigned OpcTable[4][4][9] = {
3596 {
3597 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3598 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3599 0 /* invalid */},
3600 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3601 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3602 0 /* invalid */},
3603 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3604 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3605 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3606 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3607 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3608 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3609 },
3610 {
3611 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3612 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3613 0 /* invalid */},
3614 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3615 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3616 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3617 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3618 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3619 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3620 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3621 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3622 0 /* invalid */}
3623 },
3624 {
3625 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3626 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3627 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3628 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3629 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3630 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3631 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3632 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3633 0 /* invalid */},
3634 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3635 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3636 0 /* invalid */}
3637 },
3638 {
3639 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3640 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3641 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3642 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3643 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3644 0 /* invalid */},
3645 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3646 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3647 0 /* invalid */},
3648 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3649 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3650 0 /* invalid */}
3651 },
3652 };
3653 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3654 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3655 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3656 if (!Opc) {
3657 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3658 return false;
3659 }
3660
3661 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3662 const TargetRegisterClass *SrcRC =
3663 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3664 if (!SrcRC) {
3665 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3666 return false;
3667 }
3668
3669 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3670 if (SrcTy.getSizeInBits() == 128)
3671 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3672
3673 if (SwapOperands)
3674 std::swap(SrcReg, Src2Reg);
3675
3676 MachineIRBuilder MIB(I);
3677 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3678 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3679
3680 // Invert if we had a 'ne' cc.
3681 if (NotOpc) {
3682 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3683 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3684 } else {
3685 MIB.buildCopy(DstReg, Cmp.getReg(0));
3686 }
3687 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3688 I.eraseFromParent();
3689 return true;
3690}
3691
3692MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3693 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3694 MachineIRBuilder &MIRBuilder) const {
3695 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3696
3697 auto BuildFn = [&](unsigned SubregIndex) {
3698 auto Ins =
3699 MIRBuilder
3700 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3701 .addImm(SubregIndex);
3702 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3703 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3704 return &*Ins;
3705 };
3706
3707 switch (EltSize) {
3708 case 16:
3709 return BuildFn(AArch64::hsub);
3710 case 32:
3711 return BuildFn(AArch64::ssub);
3712 case 64:
3713 return BuildFn(AArch64::dsub);
3714 default:
3715 return nullptr;
3716 }
3717}
3718
3719bool AArch64InstructionSelector::selectMergeValues(
3720 MachineInstr &I, MachineRegisterInfo &MRI) const {
3721 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")((I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3721, __PRETTY_FUNCTION__))
;
3722 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3723 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3724 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")((!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"
) ? static_cast<void> (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3724, __PRETTY_FUNCTION__))
;
3725 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3726
3727 if (I.getNumOperands() != 3)
3728 return false;
3729
3730 // Merging 2 s64s into an s128.
3731 if (DstTy == LLT::scalar(128)) {
3732 if (SrcTy.getSizeInBits() != 64)
3733 return false;
3734 MachineIRBuilder MIB(I);
3735 Register DstReg = I.getOperand(0).getReg();
3736 Register Src1Reg = I.getOperand(1).getReg();
3737 Register Src2Reg = I.getOperand(2).getReg();
3738 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3739 MachineInstr *InsMI =
3740 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3741 if (!InsMI)
3742 return false;
3743 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3744 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3745 if (!Ins2MI)
3746 return false;
3747 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3748 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3749 I.eraseFromParent();
3750 return true;
3751 }
3752
3753 if (RB.getID() != AArch64::GPRRegBankID)
3754 return false;
3755
3756 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3757 return false;
3758
3759 auto *DstRC = &AArch64::GPR64RegClass;
3760 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3761 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3762 TII.get(TargetOpcode::SUBREG_TO_REG))
3763 .addDef(SubToRegDef)
3764 .addImm(0)
3765 .addUse(I.getOperand(1).getReg())
3766 .addImm(AArch64::sub_32);
3767 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3768 // Need to anyext the second scalar before we can use bfm
3769 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3770 TII.get(TargetOpcode::SUBREG_TO_REG))
3771 .addDef(SubToRegDef2)
3772 .addImm(0)
3773 .addUse(I.getOperand(2).getReg())
3774 .addImm(AArch64::sub_32);
3775 MachineInstr &BFM =
3776 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3777 .addDef(I.getOperand(0).getReg())
3778 .addUse(SubToRegDef)
3779 .addUse(SubToRegDef2)
3780 .addImm(32)
3781 .addImm(31);
3782 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3783 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3784 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3785 I.eraseFromParent();
3786 return true;
3787}
3788
3789static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3790 const unsigned EltSize) {
3791 // Choose a lane copy opcode and subregister based off of the size of the
3792 // vector's elements.
3793 switch (EltSize) {
3794 case 16:
3795 CopyOpc = AArch64::CPYi16;
3796 ExtractSubReg = AArch64::hsub;
3797 break;
3798 case 32:
3799 CopyOpc = AArch64::CPYi32;
3800 ExtractSubReg = AArch64::ssub;
3801 break;
3802 case 64:
3803 CopyOpc = AArch64::CPYi64;
3804 ExtractSubReg = AArch64::dsub;
3805 break;
3806 default:
3807 // Unknown size, bail out.
3808 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3809 return false;
3810 }
3811 return true;
3812}
3813
3814MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3815 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3816 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3817 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3818 unsigned CopyOpc = 0;
3819 unsigned ExtractSubReg = 0;
3820 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3821 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3822 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3823 return nullptr;
3824 }
3825
3826 const TargetRegisterClass *DstRC =
3827 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3828 if (!DstRC) {
3829 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3830 return nullptr;
3831 }
3832
3833 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3834 const LLT &VecTy = MRI.getType(VecReg);
3835 const TargetRegisterClass *VecRC =
3836 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3837 if (!VecRC) {
3838 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3839 return nullptr;
3840 }
3841
3842 // The register that we're going to copy into.
3843 Register InsertReg = VecReg;
3844 if (!DstReg)
3845 DstReg = MRI.createVirtualRegister(DstRC);
3846 // If the lane index is 0, we just use a subregister COPY.
3847 if (LaneIdx == 0) {
3848 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3849 .addReg(VecReg, 0, ExtractSubReg);
3850 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3851 return &*Copy;
3852 }
3853
3854 // Lane copies require 128-bit wide registers. If we're dealing with an
3855 // unpacked vector, then we need to move up to that width. Insert an implicit
3856 // def and a subregister insert to get us there.
3857 if (VecTy.getSizeInBits() != 128) {
3858 MachineInstr *ScalarToVector = emitScalarToVector(
3859 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3860 if (!ScalarToVector)
3861 return nullptr;
3862 InsertReg = ScalarToVector->getOperand(0).getReg();
3863 }
3864
3865 MachineInstr *LaneCopyMI =
3866 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3867 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3868
3869 // Make sure that we actually constrain the initial copy.
3870 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3871 return LaneCopyMI;
3872}
3873
3874bool AArch64InstructionSelector::selectExtractElt(
3875 MachineInstr &I, MachineRegisterInfo &MRI) const {
3876 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3877, __PRETTY_FUNCTION__))
3877 "unexpected opcode!")((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3877, __PRETTY_FUNCTION__))
;
3878 Register DstReg = I.getOperand(0).getReg();
3879 const LLT NarrowTy = MRI.getType(DstReg);
3880 const Register SrcReg = I.getOperand(1).getReg();
3881 const LLT WideTy = MRI.getType(SrcReg);
3882 (void)WideTy;
3883 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3884, __PRETTY_FUNCTION__))
3884 "source register size too small!")((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3884, __PRETTY_FUNCTION__))
;
3885 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")((!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? static_cast<void> (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3885, __PRETTY_FUNCTION__))
;
3886
3887 // Need the lane index to determine the correct copy opcode.
3888 MachineOperand &LaneIdxOp = I.getOperand(2);
3889 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")((LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? static_cast<void> (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3889, __PRETTY_FUNCTION__))
;
3890
3891 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3892 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
3893 return false;
3894 }
3895
3896 // Find the index to extract from.
3897 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3898 if (!VRegAndVal)
3899 return false;
3900 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3901
3902 MachineIRBuilder MIRBuilder(I);
3903
3904 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3905 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3906 LaneIdx, MIRBuilder);
3907 if (!Extract)
3908 return false;
3909
3910 I.eraseFromParent();
3911 return true;
3912}
3913
3914bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3915 MachineInstr &I, MachineRegisterInfo &MRI) const {
3916 unsigned NumElts = I.getNumOperands() - 1;
3917 Register SrcReg = I.getOperand(NumElts).getReg();
3918 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3919 const LLT SrcTy = MRI.getType(SrcReg);
3920
3921 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")((NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? static_cast<void> (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3921, __PRETTY_FUNCTION__))
;
3922 if (SrcTy.getSizeInBits() > 128) {
3923 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
3924 return false;
3925 }
3926
3927 MachineIRBuilder MIB(I);
3928
3929 // We implement a split vector operation by treating the sub-vectors as
3930 // scalars and extracting them.
3931 const RegisterBank &DstRB =
3932 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3933 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3934 Register Dst = I.getOperand(OpIdx).getReg();
3935 MachineInstr *Extract =
3936 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3937 if (!Extract)
3938 return false;
3939 }
3940 I.eraseFromParent();
3941 return true;
3942}
3943
3944bool AArch64InstructionSelector::selectUnmergeValues(
3945 MachineInstr &I, MachineRegisterInfo &MRI) const {
3946 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3947, __PRETTY_FUNCTION__))
3947 "unexpected opcode")((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3947, __PRETTY_FUNCTION__))
;
3948
3949 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3950 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3951 AArch64::FPRRegBankID ||
3952 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3953 AArch64::FPRRegBankID) {
3954 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
3955 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
3956 return false;
3957 }
3958
3959 // The last operand is the vector source register, and every other operand is
3960 // a register to unpack into.
3961 unsigned NumElts = I.getNumOperands() - 1;
3962 Register SrcReg = I.getOperand(NumElts).getReg();
3963 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3964 const LLT WideTy = MRI.getType(SrcReg);
3965 (void)WideTy;
3966 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3967, __PRETTY_FUNCTION__))
3967 "can only unmerge from vector or s128 types!")(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3967, __PRETTY_FUNCTION__))
;
3968 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3969, __PRETTY_FUNCTION__))
3969 "source register size too small!")((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3969, __PRETTY_FUNCTION__))
;
3970
3971 if (!NarrowTy.isScalar())
3972 return selectSplitVectorUnmerge(I, MRI);
3973
3974 MachineIRBuilder MIB(I);
3975
3976 // Choose a lane copy opcode and subregister based off of the size of the
3977 // vector's elements.
3978 unsigned CopyOpc = 0;
3979 unsigned ExtractSubReg = 0;
3980 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3981 return false;
3982
3983 // Set up for the lane copies.
3984 MachineBasicBlock &MBB = *I.getParent();
3985
3986 // Stores the registers we'll be copying from.
3987 SmallVector<Register, 4> InsertRegs;
3988
3989 // We'll use the first register twice, so we only need NumElts-1 registers.
3990 unsigned NumInsertRegs = NumElts - 1;
3991
3992 // If our elements fit into exactly 128 bits, then we can copy from the source
3993 // directly. Otherwise, we need to do a bit of setup with some subregister
3994 // inserts.
3995 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3996 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3997 } else {
3998 // No. We have to perform subregister inserts. For each insert, create an
3999 // implicit def and a subregister insert, and save the register we create.
4000 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4001 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4002 MachineInstr &ImpDefMI =
4003 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4004 ImpDefReg);
4005
4006 // Now, create the subregister insert from SrcReg.
4007 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4008 MachineInstr &InsMI =
4009 *BuildMI(MBB, I, I.getDebugLoc(),
4010 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4011 .addUse(ImpDefReg)
4012 .addUse(SrcReg)
4013 .addImm(AArch64::dsub);
4014
4015 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4016 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4017
4018 // Save the register so that we can copy from it after.
4019 InsertRegs.push_back(InsertReg);
4020 }
4021 }
4022
4023 // Now that we've created any necessary subregister inserts, we can
4024 // create the copies.
4025 //
4026 // Perform the first copy separately as a subregister copy.
4027 Register CopyTo = I.getOperand(0).getReg();
4028 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4029 .addReg(InsertRegs[0], 0, ExtractSubReg);
4030 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4031
4032 // Now, perform the remaining copies as vector lane copies.
4033 unsigned LaneIdx = 1;
4034 for (Register InsReg : InsertRegs) {
4035 Register CopyTo = I.getOperand(LaneIdx).getReg();
4036 MachineInstr &CopyInst =
4037 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4038 .addUse(InsReg)
4039 .addImm(LaneIdx);
4040 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4041 ++LaneIdx;
4042 }
4043
4044 // Separately constrain the first copy's destination. Because of the
4045 // limitation in constrainOperandRegClass, we can't guarantee that this will
4046 // actually be constrained. So, do it ourselves using the second operand.
4047 const TargetRegisterClass *RC =
4048 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4049 if (!RC) {
4050 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4051 return false;
4052 }
4053
4054 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4055 I.eraseFromParent();
4056 return true;
4057}
4058
4059bool AArch64InstructionSelector::selectConcatVectors(
4060 MachineInstr &I, MachineRegisterInfo &MRI) const {
4061 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4062, __PRETTY_FUNCTION__))
4062 "Unexpected opcode")((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4062, __PRETTY_FUNCTION__))
;
4063 Register Dst = I.getOperand(0).getReg();
4064 Register Op1 = I.getOperand(1).getReg();
4065 Register Op2 = I.getOperand(2).getReg();
4066 MachineIRBuilder MIRBuilder(I);
4067 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
4068 if (!ConcatMI)
4069 return false;
4070 I.eraseFromParent();
4071 return true;
4072}
4073
4074unsigned
4075AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4076 MachineFunction &MF) const {
4077 Type *CPTy = CPVal->getType();
4078 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4079
4080 MachineConstantPool *MCP = MF.getConstantPool();
4081 return MCP->getConstantPoolIndex(CPVal, Alignment);
4082}
4083
4084MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4085 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4086 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
4087
4088 auto Adrp =
4089 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4090 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4091
4092 MachineInstr *LoadMI = nullptr;
4093 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
4094 case 16:
4095 LoadMI =
4096 &*MIRBuilder
4097 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4098 .addConstantPoolIndex(CPIdx, 0,
4099 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4100 break;
4101 case 8:
4102 LoadMI =
4103 &*MIRBuilder
4104 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4105 .addConstantPoolIndex(CPIdx, 0,
4106 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4107 break;
4108 case 4:
4109 LoadMI =
4110 &*MIRBuilder
4111 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4112 .addConstantPoolIndex(CPIdx, 0,
4113 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4114 break;
4115 default:
4116 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4117 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4118 return nullptr;
4119 }
4120 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4121 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4122 return LoadMI;
4123}
4124
4125/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4126/// size and RB.
4127static std::pair<unsigned, unsigned>
4128getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4129 unsigned Opc, SubregIdx;
4130 if (RB.getID() == AArch64::GPRRegBankID) {
4131 if (EltSize == 16) {
4132 Opc = AArch64::INSvi16gpr;
4133 SubregIdx = AArch64::ssub;
4134 } else if (EltSize == 32) {
4135 Opc = AArch64::INSvi32gpr;
4136 SubregIdx = AArch64::ssub;
4137 } else if (EltSize == 64) {
4138 Opc = AArch64::INSvi64gpr;
4139 SubregIdx = AArch64::dsub;
4140 } else {
4141 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4141)
;
4142 }
4143 } else {
4144 if (EltSize == 8) {
4145 Opc = AArch64::INSvi8lane;
4146 SubregIdx = AArch64::bsub;
4147 } else if (EltSize == 16) {
4148 Opc = AArch64::INSvi16lane;
4149 SubregIdx = AArch64::hsub;
4150 } else if (EltSize == 32) {
4151 Opc = AArch64::INSvi32lane;
4152 SubregIdx = AArch64::ssub;
4153 } else if (EltSize == 64) {
4154 Opc = AArch64::INSvi64lane;
4155 SubregIdx = AArch64::dsub;
4156 } else {
4157 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4157)
;
4158 }
4159 }
4160 return std::make_pair(Opc, SubregIdx);
4161}
4162
4163MachineInstr *AArch64InstructionSelector::emitInstr(
4164 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4165 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4166 const ComplexRendererFns &RenderFns) const {
4167 assert(Opcode && "Expected an opcode?")((Opcode && "Expected an opcode?") ? static_cast<void
> (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4167, __PRETTY_FUNCTION__))
;
4168 assert(!isPreISelGenericOpcode(Opcode) &&((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4169, __PRETTY_FUNCTION__))
4169 "Function should only be used to produce selected instructions!")((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4169, __PRETTY_FUNCTION__))
;
4170 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4171 if (RenderFns)
4172 for (auto &Fn : *RenderFns)
4173 Fn(MI);
4174 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4175 return &*MI;
4176}
4177
4178MachineInstr *AArch64InstructionSelector::emitAddSub(
4179 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4180 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4181 MachineIRBuilder &MIRBuilder) const {
4182 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4183 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4183, __PRETTY_FUNCTION__))
;
4184 auto Ty = MRI.getType(LHS.getReg());
4185 assert(!Ty.isVector() && "Expected a scalar or pointer?")((!Ty.isVector() && "Expected a scalar or pointer?") ?
static_cast<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4185, __PRETTY_FUNCTION__))
;
4186 unsigned Size = Ty.getSizeInBits();
4187 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4187, __PRETTY_FUNCTION__))
;
4188 bool Is32Bit = Size == 32;
4189
4190 // INSTRri form with positive arithmetic immediate.
4191 if (auto Fns = selectArithImmed(RHS))
4192 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4193 MIRBuilder, Fns);
4194
4195 // INSTRri form with negative arithmetic immediate.
4196 if (auto Fns = selectNegArithImmed(RHS))
4197 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4198 MIRBuilder, Fns);
4199
4200 // INSTRrx form.
4201 if (auto Fns = selectArithExtendedRegister(RHS))
4202 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4203 MIRBuilder, Fns);
4204
4205 // INSTRrs form.
4206 if (auto Fns = selectShiftedRegister(RHS))
4207 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4208 MIRBuilder, Fns);
4209 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4210 MIRBuilder);
4211}
4212
4213MachineInstr *
4214AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4215 MachineOperand &RHS,
4216 MachineIRBuilder &MIRBuilder) const {
4217 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4218 {{AArch64::ADDXri, AArch64::ADDWri},
4219 {AArch64::ADDXrs, AArch64::ADDWrs},
4220 {AArch64::ADDXrr, AArch64::ADDWrr},
4221 {AArch64::SUBXri, AArch64::SUBWri},
4222 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4223 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4224}
4225
4226MachineInstr *
4227AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4228 MachineOperand &RHS,
4229 MachineIRBuilder &MIRBuilder) const {
4230 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4231 {{AArch64::ADDSXri, AArch64::ADDSWri},
4232 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4233 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4234 {AArch64::SUBSXri, AArch64::SUBSWri},
4235 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4236 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4237}
4238
4239MachineInstr *
4240AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4241 MachineOperand &RHS,
4242 MachineIRBuilder &MIRBuilder) const {
4243 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4244 {{AArch64::SUBSXri, AArch64::SUBSWri},
4245 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4246 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4247 {AArch64::ADDSXri, AArch64::ADDSWri},
4248 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4249 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4250}
4251
4252MachineInstr *
4253AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4254 MachineIRBuilder &MIRBuilder) const {
4255 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4256 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4257 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4258 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4259}
4260
4261MachineInstr *
4262AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4263 MachineIRBuilder &MIRBuilder) const {
4264 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4264, __PRETTY_FUNCTION__))
;
4265 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4266 LLT Ty = MRI.getType(LHS.getReg());
4267 unsigned RegSize = Ty.getSizeInBits();
4268 bool Is32Bit = (RegSize == 32);
4269 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4270 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4271 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4272 // ANDS needs a logical immediate for its immediate form. Check if we can
4273 // fold one in.
4274 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4275 int64_t Imm = ValAndVReg->Value.getSExtValue();
4276
4277 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4278 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4279 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4280 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4281 return &*TstMI;
4282 }
4283 }
4284
4285 if (auto Fns = selectLogicalShiftedRegister(RHS))
4286 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4287 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4288}
4289
4290MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4291 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4292 MachineIRBuilder &MIRBuilder) const {
4293 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")((LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4293, __PRETTY_FUNCTION__))
;
4294 assert(Predicate.isPredicate() && "Expected predicate?")((Predicate.isPredicate() && "Expected predicate?") ?
static_cast<void> (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4294, __PRETTY_FUNCTION__))
;
4295 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4296 LLT CmpTy = MRI.getType(LHS.getReg());
4297 assert(!CmpTy.isVector() && "Expected scalar or pointer")((!CmpTy.isVector() && "Expected scalar or pointer") ?
static_cast<void> (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4297, __PRETTY_FUNCTION__))
;
4298 unsigned Size = CmpTy.getSizeInBits();
4299 (void)Size;
4300 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4300, __PRETTY_FUNCTION__))
;
4301 // Fold the compare into a cmn or tst if possible.
4302 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4303 return FoldCmp;
4304 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4305 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4306}
4307
4308MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4309 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4310 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4311#ifndef NDEBUG
4312 LLT Ty = MRI.getType(Dst);
4313 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4314, __PRETTY_FUNCTION__))
4314 "Expected a 32-bit scalar register?")((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4314, __PRETTY_FUNCTION__))
;
4315#endif
4316 const Register ZeroReg = AArch64::WZR;
4317 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4318 auto CSet =
4319 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4320 .addImm(getInvertedCondCode(CC));
4321 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4322 return &*CSet;
4323 };
4324
4325 AArch64CC::CondCode CC1, CC2;
4326 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4327 if (CC2 == AArch64CC::AL)
4328 return EmitCSet(Dst, CC1);
4329
4330 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4331 Register Def1Reg = MRI.createVirtualRegister(RC);
4332 Register Def2Reg = MRI.createVirtualRegister(RC);
4333 EmitCSet(Def1Reg, CC1);
4334 EmitCSet(Def2Reg, CC2);
4335 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4336 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4337 return &*OrMI;
4338}
4339
4340MachineInstr *
4341AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4342 MachineIRBuilder &MIRBuilder,
4343 Optional<CmpInst::Predicate> Pred) const {
4344 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4345 LLT Ty = MRI.getType(LHS);
4346 if (Ty.isVector())
4347 return nullptr;
4348 unsigned OpSize = Ty.getSizeInBits();
4349 if (OpSize != 32 && OpSize != 64)
4350 return nullptr;
4351
4352 // If this is a compare against +0.0, then we don't have
4353 // to explicitly materialize a constant.
4354 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4355 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4356
4357 auto IsEqualityPred = [](CmpInst::Predicate P) {
4358 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4359 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4360 };
4361 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4362 // Try commutating the operands.
4363 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4364 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4365 ShouldUseImm = true;
4366 std::swap(LHS, RHS);
4367 }
4368 }
4369 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4370 {AArch64::FCMPSri, AArch64::FCMPDri}};
4371 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4372
4373 // Partially build the compare. Decide if we need to add a use for the
4374 // third operand based off whether or not we're comparing against 0.0.
4375 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4376 if (!ShouldUseImm)
4377 CmpMI.addUse(RHS);
4378 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4379 return &*CmpMI;
4380}
4381
4382MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4383 Optional<Register> Dst, Register Op1, Register Op2,
4384 MachineIRBuilder &MIRBuilder) const {
4385 // We implement a vector concat by:
4386 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4387 // 2. Insert the upper vector into the destination's upper element
4388 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4389 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4390
4391 const LLT Op1Ty = MRI.getType(Op1);
4392 const LLT Op2Ty = MRI.getType(Op2);
4393
4394 if (Op1Ty != Op2Ty) {
4395 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4396 return nullptr;
4397 }
4398 assert(Op1Ty.isVector() && "Expected a vector for vector concat")((Op1Ty.isVector() && "Expected a vector for vector concat"
) ? static_cast<void> (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4398, __PRETTY_FUNCTION__))
;
4399
4400 if (Op1Ty.getSizeInBits() >= 128) {
4401 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4402 return nullptr;
4403 }
4404
4405 // At the moment we just support 64 bit vector concats.
4406 if (Op1Ty.getSizeInBits() != 64) {
4407 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4408 return nullptr;
4409 }
4410
4411 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4412 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4413 const TargetRegisterClass *DstRC =
4414 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4415
4416 MachineInstr *WidenedOp1 =
4417 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4418 MachineInstr *WidenedOp2 =
4419 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4420 if (!WidenedOp1 || !WidenedOp2) {
4421 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4422 return nullptr;
4423 }
4424
4425 // Now do the insert of the upper element.
4426 unsigned InsertOpc, InsSubRegIdx;
4427 std::tie(InsertOpc, InsSubRegIdx) =
4428 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4429
4430 if (!Dst)
4431 Dst = MRI.createVirtualRegister(DstRC);
4432 auto InsElt =
4433 MIRBuilder
4434 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4435 .addImm(1) /* Lane index */
4436 .addUse(WidenedOp2->getOperand(0).getReg())
4437 .addImm(0);
4438 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4439 return &*InsElt;
4440}
4441
4442MachineInstr *
4443AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4444 MachineIRBuilder &MIRBuilder,
4445 Register SrcReg) const {
4446 // CSINC increments the result when the predicate is false. Invert it.
4447 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4448 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4449 auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4450 .addImm(InvCC);
4451 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4452 return &*I;
4453}
4454
4455std::pair<MachineInstr *, AArch64CC::CondCode>
4456AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4457 MachineOperand &LHS,
4458 MachineOperand &RHS,
4459 MachineIRBuilder &MIRBuilder) const {
4460 switch (Opcode) {
4461 default:
4462 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4462)
;
4463 case TargetOpcode::G_SADDO:
4464 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4465 case TargetOpcode::G_UADDO:
4466 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4467 case TargetOpcode::G_SSUBO:
4468 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4469 case TargetOpcode::G_USUBO:
4470 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4471 }
4472}
4473
4474bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4475 MachineIRBuilder MIB(I);
4476 MachineRegisterInfo &MRI = *MIB.getMRI();
4477 // We want to recognize this pattern:
4478 //
4479 // $z = G_FCMP pred, $x, $y
4480 // ...
4481 // $w = G_SELECT $z, $a, $b
4482 //
4483 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4484 // some copies/truncs in between.)
4485 //
4486 // If we see this, then we can emit something like this:
4487 //
4488 // fcmp $x, $y
4489 // fcsel $w, $a, $b, pred
4490 //
4491 // Rather than emitting both of the rather long sequences in the standard
4492 // G_FCMP/G_SELECT select methods.
4493
4494 // First, check if the condition is defined by a compare.
4495 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4496 while (CondDef) {
4497 // We can only fold if all of the defs have one use.
4498 Register CondDefReg = CondDef->getOperand(0).getReg();
4499 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4500 // Unless it's another select.
4501 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4502 if (CondDef == &UI)
4503 continue;
4504 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4505 return false;
4506 }
4507 }
4508
4509 // We can skip over G_TRUNC since the condition is 1-bit.
4510 // Truncating/extending can have no impact on the value.
4511 unsigned Opc = CondDef->getOpcode();
4512 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4513 break;
4514
4515 // Can't see past copies from physregs.
4516 if (Opc == TargetOpcode::COPY &&
4517 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4518 return false;
4519
4520 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4521 }
4522
4523 // Is the condition defined by a compare?
4524 if (!CondDef)
4525 return false;
4526
4527 unsigned CondOpc = CondDef->getOpcode();
4528 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4529 return false;
4530
4531 AArch64CC::CondCode CondCode;
4532 if (CondOpc == TargetOpcode::G_ICMP) {
4533 auto Pred =
4534 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4535 CondCode = changeICMPPredToAArch64CC(Pred);
4536 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4537 CondDef->getOperand(1), MIB);
4538 } else {
4539 // Get the condition code for the select.
4540 auto Pred =
4541 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4542 AArch64CC::CondCode CondCode2;
4543 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4544
4545 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4546 // instructions to emit the comparison.
4547 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4548 // unnecessary.
4549 if (CondCode2 != AArch64CC::AL)
4550 return false;
4551
4552 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4553 CondDef->getOperand(3).getReg(), MIB)) {
4554 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4555 return false;
4556 }
4557 }
4558
4559 // Emit the select.
4560 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4561 I.getOperand(3).getReg(), CondCode, MIB);
4562 I.eraseFromParent();
4563 return true;
4564}
4565
4566MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4567 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4568 MachineIRBuilder &MIRBuilder) const {
4569 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4570, __PRETTY_FUNCTION__))
4570 "Unexpected MachineOperand")((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4570, __PRETTY_FUNCTION__))
;
4571 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4572 // We want to find this sort of thing:
4573 // x = G_SUB 0, y
4574 // G_ICMP z, x
4575 //
4576 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4577 // e.g:
4578 //
4579 // cmn z, y
4580
4581 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4582 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4583 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4584 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4585 // Given this:
4586 //
4587 // x = G_SUB 0, y
4588 // G_ICMP x, z
4589 //
4590 // Produce this:
4591 //
4592 // cmn y, z
4593 if (isCMN(LHSDef, P, MRI))
4594 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4595
4596 // Same idea here, but with the RHS of the compare instead:
4597 //
4598 // Given this:
4599 //
4600 // x = G_SUB 0, y
4601 // G_ICMP z, x
4602 //
4603 // Produce this:
4604 //
4605 // cmn z, y
4606 if (isCMN(RHSDef, P, MRI))
4607 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4608
4609 // Given this:
4610 //
4611 // z = G_AND x, y
4612 // G_ICMP z, 0
4613 //
4614 // Produce this if the compare is signed:
4615 //
4616 // tst x, y
4617 if (!CmpInst::isUnsigned(P) && LHSDef &&
4618 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4619 // Make sure that the RHS is 0.
4620 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4621 if (!ValAndVReg || ValAndVReg->Value != 0)
4622 return nullptr;
4623
4624 return emitTST(LHSDef->getOperand(1),
4625 LHSDef->getOperand(2), MIRBuilder);
4626 }
4627
4628 return nullptr;
4629}
4630
4631bool AArch64InstructionSelector::selectShuffleVector(
4632 MachineInstr &I, MachineRegisterInfo &MRI) const {
4633 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4634 Register Src1Reg = I.getOperand(1).getReg();
4635 const LLT Src1Ty = MRI.getType(Src1Reg);
4636 Register Src2Reg = I.getOperand(2).getReg();
4637 const LLT Src2Ty = MRI.getType(Src2Reg);
4638 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4639
4640 MachineBasicBlock &MBB = *I.getParent();
4641 MachineFunction &MF = *MBB.getParent();
4642 LLVMContext &Ctx = MF.getFunction().getContext();
4643
4644 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4645 // it's originated from a <1 x T> type. Those should have been lowered into
4646 // G_BUILD_VECTOR earlier.
4647 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4648 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4649 return false;
4650 }
4651
4652 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4653
4654 SmallVector<Constant *, 64> CstIdxs;
4655 for (int Val : Mask) {
4656 // For now, any undef indexes we'll just assume to be 0. This should be
4657 // optimized in future, e.g. to select DUP etc.
4658 Val = Val < 0 ? 0 : Val;
4659 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4660 unsigned Offset = Byte + Val * BytesPerElt;
4661 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4662 }
4663 }
4664
4665 MachineIRBuilder MIRBuilder(I);
4666
4667 // Use a constant pool to load the index vector for TBL.
4668 Constant *CPVal = ConstantVector::get(CstIdxs);
4669 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4670 if (!IndexLoad) {
4671 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4672 return false;
4673 }
4674
4675 if (DstTy.getSizeInBits() != 128) {
4676 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")((DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"
) ? static_cast<void> (0) : __assert_fail ("DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4676, __PRETTY_FUNCTION__))
;
4677 // This case can be done with TBL1.
4678 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4679 if (!Concat) {
4680 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4681 return false;
4682 }
4683
4684 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4685 IndexLoad =
4686 emitScalarToVector(64, &AArch64::FPR128RegClass,
4687 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4688
4689 auto TBL1 = MIRBuilder.buildInstr(
4690 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4691 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4692 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4693
4694 auto Copy =
4695 MIRBuilder
4696 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4697 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4698 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4699 I.eraseFromParent();
4700 return true;
4701 }
4702
4703 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4704 // Q registers for regalloc.
4705 auto RegSeq = MIRBuilder
4706 .buildInstr(TargetOpcode::REG_SEQUENCE,
4707 {&AArch64::QQRegClass}, {Src1Reg})
4708 .addImm(AArch64::qsub0)
4709 .addUse(Src2Reg)
4710 .addImm(AArch64::qsub1);
4711
4712 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4713 {RegSeq, IndexLoad->getOperand(0)});
4714 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4715 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4716 I.eraseFromParent();
4717 return true;
4718}
4719
4720MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4721 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4722 unsigned LaneIdx, const RegisterBank &RB,
4723 MachineIRBuilder &MIRBuilder) const {
4724 MachineInstr *InsElt = nullptr;
4725 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4726 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4727
4728 // Create a register to define with the insert if one wasn't passed in.
4729 if (!DstReg)
4730 DstReg = MRI.createVirtualRegister(DstRC);
4731
4732 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4733 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4734
4735 if (RB.getID() == AArch64::FPRRegBankID) {
4736 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4737 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4738 .addImm(LaneIdx)
4739 .addUse(InsSub->getOperand(0).getReg())
4740 .addImm(0);
4741 } else {
4742 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4743 .addImm(LaneIdx)
4744 .addUse(EltReg);
4745 }
4746
4747 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4748 return InsElt;
4749}
4750
4751bool AArch64InstructionSelector::selectInsertElt(
4752 MachineInstr &I, MachineRegisterInfo &MRI) const {
4753 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)((I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4753, __PRETTY_FUNCTION__))
;
4754
4755 // Get information on the destination.
4756 Register DstReg = I.getOperand(0).getReg();
4757 const LLT DstTy = MRI.getType(DstReg);
4758 unsigned VecSize = DstTy.getSizeInBits();
4759
4760 // Get information on the element we want to insert into the destination.
4761 Register EltReg = I.getOperand(2).getReg();
4762 const LLT EltTy = MRI.getType(EltReg);
4763 unsigned EltSize = EltTy.getSizeInBits();
4764 if (EltSize < 16 || EltSize > 64)
4765 return false; // Don't support all element types yet.
4766
4767 // Find the definition of the index. Bail out if it's not defined by a
4768 // G_CONSTANT.
4769 Register IdxReg = I.getOperand(3).getReg();
4770 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4771 if (!VRegAndVal)
4772 return false;
4773 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4774
4775 // Perform the lane insert.
4776 Register SrcReg = I.getOperand(1).getReg();
4777 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4778 MachineIRBuilder MIRBuilder(I);
4779
4780 if (VecSize < 128) {
4781 // If the vector we're inserting into is smaller than 128 bits, widen it
4782 // to 128 to do the insert.
4783 MachineInstr *ScalarToVec = emitScalarToVector(
4784 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4785 if (!ScalarToVec)
4786 return false;
4787 SrcReg = ScalarToVec->getOperand(0).getReg();
4788 }
4789
4790 // Create an insert into a new FPR128 register.
4791 // Note that if our vector is already 128 bits, we end up emitting an extra
4792 // register.
4793 MachineInstr *InsMI =
4794 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4795
4796 if (VecSize < 128) {
4797 // If we had to widen to perform the insert, then we have to demote back to
4798 // the original size to get the result we want.
4799 Register DemoteVec = InsMI->getOperand(0).getReg();
4800 const TargetRegisterClass *RC =
4801 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4802 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4803 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4804 return false;
4805 }
4806 unsigned SubReg = 0;
4807 if (!getSubRegForClass(RC, TRI, SubReg))
4808 return false;
4809 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4810 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
4811 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
4812 return false;
4813 }
4814 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4815 .addReg(DemoteVec, 0, SubReg);
4816 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4817 } else {
4818 // No widening needed.
4819 InsMI->getOperand(0).setReg(DstReg);
4820 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4821 }
4822
4823 I.eraseFromParent();
4824 return true;
4825}
4826
4827MachineInstr *
4828AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
4829 MachineIRBuilder &MIRBuilder,
4830 MachineRegisterInfo &MRI) const {
4831 LLT DstTy = MRI.getType(Dst);
4832 unsigned DstSize = DstTy.getSizeInBits();
4833 if (CV->isNullValue()) {
4834 if (DstSize == 128) {
4835 auto Mov =
4836 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
4837 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4838 return &*Mov;
4839 }
4840
4841 if (DstSize == 64) {
4842 auto Mov =
4843 MIRBuilder
4844 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4845 .addImm(0);
4846 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
4847 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4848 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
4849 return &*Copy;
4850 }
4851 }
4852
4853 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
4854 if (!CPLoad) {
4855 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
4856 return nullptr;
4857 }
4858
4859 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
4860 RBI.constrainGenericRegister(
4861 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
4862 return &*Copy;
4863}
4864
4865bool AArch64InstructionSelector::tryOptConstantBuildVec(
4866 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4867 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4867, __PRETTY_FUNCTION__))
;
4868 unsigned DstSize = DstTy.getSizeInBits();
4869 assert(DstSize <= 128 && "Unexpected build_vec type!")((DstSize <= 128 && "Unexpected build_vec type!") ?
static_cast<void> (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4869, __PRETTY_FUNCTION__))
;
4870 if (DstSize < 32)
4871 return false;
4872 // Check if we're building a constant vector, in which case we want to
4873 // generate a constant pool load instead of a vector insert sequence.
4874 SmallVector<Constant *, 16> Csts;
4875 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4876 // Try to find G_CONSTANT or G_FCONSTANT
4877 auto *OpMI =
4878 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4879 if (OpMI)
4880 Csts.emplace_back(
4881 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4882 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4883 I.getOperand(Idx).getReg(), MRI)))
4884 Csts.emplace_back(
4885 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4886 else
4887 return false;
4888 }
4889 Constant *CV = ConstantVector::get(Csts);
4890 MachineIRBuilder MIB(I);
4891 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
4892 return false;
4893 I.eraseFromParent();
4894 return true;
4895}
4896
4897bool AArch64InstructionSelector::selectBuildVector(
4898 MachineInstr &I, MachineRegisterInfo &MRI) const {
4899 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4899, __PRETTY_FUNCTION__))
;
4900 // Until we port more of the optimized selections, for now just use a vector
4901 // insert sequence.
4902 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4903 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4904 unsigned EltSize = EltTy.getSizeInBits();
4905
4906 if (tryOptConstantBuildVec(I, DstTy, MRI))
4907 return true;
4908 if (EltSize < 16 || EltSize > 64)
4909 return false; // Don't support all element types yet.
4910 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4911 MachineIRBuilder MIRBuilder(I);
4912
4913 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4914 MachineInstr *ScalarToVec =
4915 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4916 I.getOperand(1).getReg(), MIRBuilder);
4917 if (!ScalarToVec)
4918 return false;
4919
4920 Register DstVec = ScalarToVec->getOperand(0).getReg();
4921 unsigned DstSize = DstTy.getSizeInBits();
4922
4923 // Keep track of the last MI we inserted. Later on, we might be able to save
4924 // a copy using it.
4925 MachineInstr *PrevMI = nullptr;
4926 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4927 // Note that if we don't do a subregister copy, we can end up making an
4928 // extra register.
4929 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4930 MIRBuilder);
4931 DstVec = PrevMI->getOperand(0).getReg();
4932 }
4933
4934 // If DstTy's size in bits is less than 128, then emit a subregister copy
4935 // from DstVec to the last register we've defined.
4936 if (DstSize < 128) {
4937 // Force this to be FPR using the destination vector.
4938 const TargetRegisterClass *RC =
4939 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4940 if (!RC)
4941 return false;
4942 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4943 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4944 return false;
4945 }
4946
4947 unsigned SubReg = 0;
4948 if (!getSubRegForClass(RC, TRI, SubReg))
4949 return false;
4950 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4951 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
4952 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
4953 return false;
4954 }
4955
4956 Register Reg = MRI.createVirtualRegister(RC);
4957 Register DstReg = I.getOperand(0).getReg();
4958
4959 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4960 .addReg(DstVec, 0, SubReg);
4961 MachineOperand &RegOp = I.getOperand(1);
4962 RegOp.setReg(Reg);
4963 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4964 } else {
4965 // We don't need a subregister copy. Save a copy by re-using the
4966 // destination register on the final insert.
4967 assert(PrevMI && "PrevMI was null?")((PrevMI && "PrevMI was null?") ? static_cast<void
> (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4967, __PRETTY_FUNCTION__))
;
4968 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4969 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4970 }
4971
4972 I.eraseFromParent();
4973 return true;
4974}
4975
4976/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4977/// ID if it exists, and 0 otherwise.
4978static unsigned findIntrinsicID(MachineInstr &I) {
4979 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4980 return Op.isIntrinsicID();
4981 });
4982 if (IntrinOp == I.operands_end())
4983 return 0;
4984 return IntrinOp->getIntrinsicID();
4985}
4986
4987bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4988 MachineInstr &I, MachineRegisterInfo &MRI) const {
4989 // Find the intrinsic ID.
4990 unsigned IntrinID = findIntrinsicID(I);
4991 if (!IntrinID)
4992 return false;
4993 MachineIRBuilder MIRBuilder(I);
4994
4995 // Select the instruction.
4996 switch (IntrinID) {
4997 default:
4998 return false;
4999 case Intrinsic::trap:
5000 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5001 break;
5002 case Intrinsic::debugtrap:
5003 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5004 break;
5005 case Intrinsic::ubsantrap:
5006 MIRBuilder.buildInstr(AArch64::BRK, {}, {})
5007 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5008 break;
5009 }
5010
5011 I.eraseFromParent();
5012 return true;
5013}
5014
5015bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5016 MachineRegisterInfo &MRI) {
5017 unsigned IntrinID = findIntrinsicID(I);
5018 if (!IntrinID)
5019 return false;
5020 MachineIRBuilder MIRBuilder(I);
5021
5022 switch (IntrinID) {
5023 default:
5024 break;
5025 case Intrinsic::aarch64_crypto_sha1h: {
5026 Register DstReg = I.getOperand(0).getReg();
5027 Register SrcReg = I.getOperand(2).getReg();
5028
5029 // FIXME: Should this be an assert?
5030 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5031 MRI.getType(SrcReg).getSizeInBits() != 32)
5032 return false;
5033
5034 // The operation has to happen on FPRs. Set up some new FPR registers for
5035 // the source and destination if they are on GPRs.
5036 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5037 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5038 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
5039
5040 // Make sure the copy ends up getting constrained properly.
5041 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5042 AArch64::GPR32RegClass, MRI);
5043 }
5044
5045 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5046 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5047
5048 // Actually insert the instruction.
5049 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5050 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5051
5052 // Did we create a new register for the destination?
5053 if (DstReg != I.getOperand(0).getReg()) {
5054 // Yep. Copy the result of the instruction back into the original
5055 // destination.
5056 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
5057 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5058 AArch64::GPR32RegClass, MRI);
5059 }
5060
5061 I.eraseFromParent();
5062 return true;
5063 }
5064 case Intrinsic::frameaddress:
5065 case Intrinsic::returnaddress: {
5066 MachineFunction &MF = *I.getParent()->getParent();
5067 MachineFrameInfo &MFI = MF.getFrameInfo();
5068
5069 unsigned Depth = I.getOperand(2).getImm();
5070 Register DstReg = I.getOperand(0).getReg();
5071 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5072
5073 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5074 if (!MFReturnAddr) {
5075 // Insert the copy from LR/X30 into the entry block, before it can be
5076 // clobbered by anything.
5077 MFI.setReturnAddressIsTaken(true);
5078 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5079 AArch64::GPR64RegClass);
5080 }
5081
5082 if (STI.hasPAuth()) {
5083 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5084 } else {
5085 MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5086 MIRBuilder.buildInstr(AArch64::XPACLRI);
5087 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5088 }
5089
5090 I.eraseFromParent();
5091 return true;
5092 }
5093
5094 MFI.setFrameAddressIsTaken(true);
5095 Register FrameAddr(AArch64::FP);
5096 while (Depth--) {
5097 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5098 auto Ldr =
5099 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
5100 .addImm(0);
5101 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5102 FrameAddr = NextFrame;
5103 }
5104
5105 if (IntrinID == Intrinsic::frameaddress)
5106 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
5107 else {
5108 MFI.setReturnAddressIsTaken(true);
5109
5110 if (STI.hasPAuth()) {
5111 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5112 MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5113 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5114 } else {
5115 MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
5116 MIRBuilder.buildInstr(AArch64::XPACLRI);
5117 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5118 }
5119 }
5120
5121 I.eraseFromParent();
5122 return true;
5123 }
5124 }
5125 return false;
5126}
5127
5128InstructionSelector::ComplexRendererFns
5129AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5130 auto MaybeImmed = getImmedFromMO(Root);
5131 if (MaybeImmed == None || *MaybeImmed > 31)
5132 return None;
5133 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5134 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5135}
5136
5137InstructionSelector::ComplexRendererFns
5138AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5139 auto MaybeImmed = getImmedFromMO(Root);
5140 if (MaybeImmed == None || *MaybeImmed > 31)
5141 return None;
5142 uint64_t Enc = 31 - *MaybeImmed;
5143 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5144}
5145
5146InstructionSelector::ComplexRendererFns
5147AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5148 auto MaybeImmed = getImmedFromMO(Root);
5149 if (MaybeImmed == None || *MaybeImmed > 63)
5150 return None;
5151 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5152 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5153}
5154
5155InstructionSelector::ComplexRendererFns
5156AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5157 auto MaybeImmed = getImmedFromMO(Root);
5158 if (MaybeImmed == None || *MaybeImmed > 63)
5159 return None;
5160 uint64_t Enc = 63 - *MaybeImmed;
5161 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5162}
5163
5164/// Helper to select an immediate value that can be represented as a 12-bit
5165/// value shifted left by either 0 or 12. If it is possible to do so, return
5166/// the immediate and shift value. If not, return None.
5167///
5168/// Used by selectArithImmed and selectNegArithImmed.
5169InstructionSelector::ComplexRendererFns
5170AArch64InstructionSelector::select12BitValueWithLeftShift(
5171 uint64_t Immed) const {
5172 unsigned ShiftAmt;
5173 if (Immed >> 12 == 0) {
5174 ShiftAmt = 0;
5175 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5176 ShiftAmt = 12;
5177 Immed = Immed >> 12;
5178 } else
5179 return None;
5180
5181 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5182 return {{
5183 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5184 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5185 }};
5186}
5187
5188/// SelectArithImmed - Select an immediate value that can be represented as
5189/// a 12-bit value shifted left by either 0 or 12. If so, return true with
5190/// Val set to the 12-bit value and Shift set to the shifter operand.
5191InstructionSelector::ComplexRendererFns
5192AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5193 // This function is called from the addsub_shifted_imm ComplexPattern,
5194 // which lists [imm] as the list of opcode it's interested in, however
5195 // we still need to check whether the operand is actually an immediate
5196 // here because the ComplexPattern opcode list is only used in
5197 // root-level opcode matching.
5198 auto MaybeImmed = getImmedFromMO(Root);
5199 if (MaybeImmed == None)
5200 return None;
5201 return select12BitValueWithLeftShift(*MaybeImmed);
5202}
5203
5204/// SelectNegArithImmed - As above, but negates the value before trying to
5205/// select it.
5206InstructionSelector::ComplexRendererFns
5207AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5208 // We need a register here, because we need to know if we have a 64 or 32
5209 // bit immediate.
5210 if (!Root.isReg())
5211 return None;
5212 auto MaybeImmed = getImmedFromMO(Root);
5213 if (MaybeImmed == None)
5214 return None;
5215 uint64_t Immed = *MaybeImmed;
5216
5217 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5218 // have the opposite effect on the C flag, so this pattern mustn't match under
5219 // those circumstances.
5220 if (Immed == 0)
5221 return None;
5222
5223 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5224 // the root.
5225 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5226 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5227 Immed = ~((uint32_t)Immed) + 1;
5228 else
5229 Immed = ~Immed + 1ULL;
5230
5231 if (Immed & 0xFFFFFFFFFF000000ULL)
5232 return None;
5233
5234 Immed &= 0xFFFFFFULL;
5235 return select12BitValueWithLeftShift(Immed);
5236}
5237
5238/// Return true if it is worth folding MI into an extended register. That is,
5239/// if it's safe to pull it into the addressing mode of a load or store as a
5240/// shift.
5241bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5242 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5243 // Always fold if there is one use, or if we're optimizing for size.
5244 Register DefReg = MI.getOperand(0).getReg();
5245 if (MRI.hasOneNonDBGUse(DefReg) ||
5246 MI.getParent()->getParent()->getFunction().hasOptSize())
5247 return true;
5248
5249 // It's better to avoid folding and recomputing shifts when we don't have a
5250 // fastpath.
5251 if (!STI.hasLSLFast())
5252 return false;
5253
5254 // We have a fastpath, so folding a shift in and potentially computing it
5255 // many times may be beneficial. Check if this is only used in memory ops.
5256 // If it is, then we should fold.
5257 return all_of(MRI.use_nodbg_instructions(DefReg),
5258 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5259}
5260
5261static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5262 switch (Type) {
5263 case AArch64_AM::SXTB:
5264 case AArch64_AM::SXTH:
5265 case AArch64_AM::SXTW:
5266 return true;
5267 default:
5268 return false;
5269 }
5270}
5271
5272InstructionSelector::ComplexRendererFns
5273AArch64InstructionSelector::selectExtendedSHL(
5274 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5275 unsigned SizeInBytes, bool WantsExt) const {
5276 assert(Base.isReg() && "Expected base to be a register operand")((Base.isReg() && "Expected base to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5276, __PRETTY_FUNCTION__))
;
5277 assert(Offset.isReg() && "Expected offset to be a register operand")((Offset.isReg() && "Expected offset to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5277, __PRETTY_FUNCTION__))
;
5278
5279 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5280 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5281 if (!OffsetInst)
5282 return None;
5283
5284 unsigned OffsetOpc = OffsetInst->getOpcode();
5285 bool LookedThroughZExt = false;
5286 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5287 // Try to look through a ZEXT.
5288 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5289 return None;
5290
5291 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5292 OffsetOpc = OffsetInst->getOpcode();
5293 LookedThroughZExt = true;
5294
5295 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5296 return None;
5297 }
5298 // Make sure that the memory op is a valid size.
5299 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5300 if (LegalShiftVal == 0)
5301 return None;
5302 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5303 return None;
5304
5305 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5306 // register we will offset is the LHS, and the register containing the
5307 // constant is the RHS.
5308 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5309 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5310 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5311 if (!ValAndVReg) {
5312 // We didn't get a constant on the RHS. If the opcode is a shift, then
5313 // we're done.
5314 if (OffsetOpc == TargetOpcode::G_SHL)
5315 return None;
5316
5317 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5318 std::swap(OffsetReg, ConstantReg);
5319 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5320 if (!ValAndVReg)
5321 return None;
5322 }
5323
5324 // The value must fit into 3 bits, and must be positive. Make sure that is
5325 // true.
5326 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5327
5328 // Since we're going to pull this into a shift, the constant value must be
5329 // a power of 2. If we got a multiply, then we need to check this.
5330 if (OffsetOpc == TargetOpcode::G_MUL) {
5331 if (!isPowerOf2_32(ImmVal))
5332 return None;
5333
5334 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5335 ImmVal = Log2_32(ImmVal);
5336 }
5337
5338 if ((ImmVal & 0x7) != ImmVal)
5339 return None;
5340
5341 // We are only allowed to shift by LegalShiftVal. This shift value is built
5342 // into the instruction, so we can't just use whatever we want.
5343 if (ImmVal != LegalShiftVal)
5344 return None;
5345
5346 unsigned SignExtend = 0;
5347 if (WantsExt) {
5348 // Check if the offset is defined by an extend, unless we looked through a
5349 // G_ZEXT earlier.
5350 if (!LookedThroughZExt) {
5351 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5352 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5353 if (Ext == AArch64_AM::InvalidShiftExtend)
5354 return None;
5355
5356 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5357 // We only support SXTW for signed extension here.
5358 if (SignExtend && Ext != AArch64_AM::SXTW)
5359 return None;
5360 OffsetReg = ExtInst->getOperand(1).getReg();
5361 }
5362
5363 // Need a 32-bit wide register here.
5364 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5365 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5366 }
5367
5368 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5369 // offset. Signify that we are shifting by setting the shift flag to 1.
5370 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5371 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5372 [=](MachineInstrBuilder &MIB) {
5373 // Need to add both immediates here to make sure that they are both
5374 // added to the instruction.
5375 MIB.addImm(SignExtend);
5376 MIB.addImm(1);
5377 }}};
5378}
5379
5380/// This is used for computing addresses like this:
5381///
5382/// ldr x1, [x2, x3, lsl #3]
5383///
5384/// Where x2 is the base register, and x3 is an offset register. The shift-left
5385/// is a constant value specific to this load instruction. That is, we'll never
5386/// see anything other than a 3 here (which corresponds to the size of the
5387/// element being loaded.)
5388InstructionSelector::ComplexRendererFns
5389AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5390 MachineOperand &Root, unsigned SizeInBytes) const {
5391 if (!Root.isReg())
5392 return None;
5393 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5394
5395 // We want to find something like this:
5396 //
5397 // val = G_CONSTANT LegalShiftVal
5398 // shift = G_SHL off_reg val
5399 // ptr = G_PTR_ADD base_reg shift
5400 // x = G_LOAD ptr
5401 //
5402 // And fold it into this addressing mode:
5403 //
5404 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5405
5406 // Check if we can find the G_PTR_ADD.
5407 MachineInstr *PtrAdd =
5408 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5409 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5410 return None;
5411
5412 // Now, try to match an opcode which will match our specific offset.
5413 // We want a G_SHL or a G_MUL.
5414 MachineInstr *OffsetInst =
5415 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5416 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5417 OffsetInst->getOperand(0), SizeInBytes,
5418 /*WantsExt=*/false);
5419}
5420
5421/// This is used for computing addresses like this:
5422///
5423/// ldr x1, [x2, x3]
5424///
5425/// Where x2 is the base register, and x3 is an offset register.
5426///
5427/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5428/// this will do so. Otherwise, it will return None.
5429InstructionSelector::ComplexRendererFns
5430AArch64InstructionSelector::selectAddrModeRegisterOffset(
5431 MachineOperand &Root) const {
5432 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5433
5434 // We need a GEP.
5435 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5436 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5437 return None;
5438
5439 // If this is used more than once, let's not bother folding.
5440 // TODO: Check if they are memory ops. If they are, then we can still fold
5441 // without having to recompute anything.
5442 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5443 return None;
5444
5445 // Base is the GEP's LHS, offset is its RHS.
5446 return {{[=](MachineInstrBuilder &MIB) {
5447 MIB.addUse(Gep->getOperand(1).getReg());
5448 },
5449 [=](MachineInstrBuilder &MIB) {
5450 MIB.addUse(Gep->getOperand(2).getReg());
5451 },
5452 [=](MachineInstrBuilder &MIB) {
5453 // Need to add both immediates here to make sure that they are both
5454 // added to the instruction.
5455 MIB.addImm(0);
5456 MIB.addImm(0);
5457 }}};
5458}
5459
5460/// This is intended to be equivalent to selectAddrModeXRO in
5461/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5462InstructionSelector::ComplexRendererFns
5463AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5464 unsigned SizeInBytes) const {
5465 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5466 if (!Root.isReg())
5467 return None;
5468 MachineInstr *PtrAdd =
5469 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5470 if (!PtrAdd)
5471 return None;
5472
5473 // Check for an immediates which cannot be encoded in the [base + imm]
5474 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5475 // end up with code like:
5476 //
5477 // mov x0, wide
5478 // add x1 base, x0
5479 // ldr x2, [x1, x0]
5480 //
5481 // In this situation, we can use the [base, xreg] addressing mode to save an
5482 // add/sub:
5483 //
5484 // mov x0, wide
5485 // ldr x2, [base, x0]
5486 auto ValAndVReg =
5487 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5488 if (ValAndVReg) {
5489 unsigned Scale = Log2_32(SizeInBytes);
5490 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
5491
5492 // Skip immediates that can be selected in the load/store addresing
5493 // mode.
5494 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5495 ImmOff < (0x1000 << Scale))
5496 return None;
5497
5498 // Helper lambda to decide whether or not it is preferable to emit an add.
5499 auto isPreferredADD = [](int64_t ImmOff) {
5500 // Constants in [0x0, 0xfff] can be encoded in an add.
5501 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5502 return true;
5503
5504 // Can it be encoded in an add lsl #12?
5505 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5506 return false;
5507
5508 // It can be encoded in an add lsl #12, but we may not want to. If it is
5509 // possible to select this as a single movz, then prefer that. A single
5510 // movz is faster than an add with a shift.
5511 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5512 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5513 };
5514
5515 // If the immediate can be encoded in a single add/sub, then bail out.
5516 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5517 return None;
5518 }
5519
5520 // Try to fold shifts into the addressing mode.
5521 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5522 if (AddrModeFns)
5523 return AddrModeFns;
5524
5525 // If that doesn't work, see if it's possible to fold in registers from
5526 // a GEP.
5527 return selectAddrModeRegisterOffset(Root);
5528}
5529
5530/// This is used for computing addresses like this:
5531///
5532/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5533///
5534/// Where we have a 64-bit base register, a 32-bit offset register, and an
5535/// extend (which may or may not be signed).
5536InstructionSelector::ComplexRendererFns
5537AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5538 unsigned SizeInBytes) const {
5539 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5540
5541 MachineInstr *PtrAdd =
5542 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5543 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5544 return None;
5545
5546 MachineOperand &LHS = PtrAdd->getOperand(1);
5547 MachineOperand &RHS = PtrAdd->getOperand(2);
5548 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5549
5550 // The first case is the same as selectAddrModeXRO, except we need an extend.
5551 // In this case, we try to find a shift and extend, and fold them into the
5552 // addressing mode.
5553 //
5554 // E.g.
5555 //
5556 // off_reg = G_Z/S/ANYEXT ext_reg
5557 // val = G_CONSTANT LegalShiftVal
5558 // shift = G_SHL off_reg val
5559 // ptr = G_PTR_ADD base_reg shift
5560 // x = G_LOAD ptr
5561 //
5562 // In this case we can get a load like this:
5563 //
5564 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5565 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5566 SizeInBytes, /*WantsExt=*/true);
5567 if (ExtendedShl)
5568 return ExtendedShl;
5569
5570 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5571 //
5572 // e.g.
5573 // ldr something, [base_reg, ext_reg, sxtw]
5574 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5575 return None;
5576
5577 // Check if this is an extend. We'll get an extend type if it is.
5578 AArch64_AM::ShiftExtendType Ext =
5579 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5580 if (Ext == AArch64_AM::InvalidShiftExtend)
5581 return None;
5582
5583 // Need a 32-bit wide register.
5584 MachineIRBuilder MIB(*PtrAdd);
5585 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5586 AArch64::GPR32RegClass, MIB);
5587 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5588
5589 // Base is LHS, offset is ExtReg.
5590 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5591 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5592 [=](MachineInstrBuilder &MIB) {
5593 MIB.addImm(SignExtend);
5594 MIB.addImm(0);
5595 }}};
5596}
5597
5598/// Select a "register plus unscaled signed 9-bit immediate" address. This
5599/// should only match when there is an offset that is not valid for a scaled
5600/// immediate addressing mode. The "Size" argument is the size in bytes of the
5601/// memory reference, which is needed here to know what is valid for a scaled
5602/// immediate.
5603InstructionSelector::ComplexRendererFns
5604AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5605 unsigned Size) const {
5606 MachineRegisterInfo &MRI =
5607 Root.getParent()->getParent()->getParent()->getRegInfo();
5608
5609 if (!Root.isReg())
5610 return None;
5611
5612 if (!isBaseWithConstantOffset(Root, MRI))
5613 return None;
5614
5615 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5616 if (!RootDef)
5617 return None;
5618
5619 MachineOperand &OffImm = RootDef->getOperand(2);
5620 if (!OffImm.isReg())
5621 return None;
5622 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5623 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5624 return None;
5625 int64_t RHSC;
5626 MachineOperand &RHSOp1 = RHS->getOperand(1);
5627 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5628 return None;
5629 RHSC = RHSOp1.getCImm()->getSExtValue();
5630
5631 // If the offset is valid as a scaled immediate, don't match here.
5632 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5633 return None;
5634 if (RHSC >= -256 && RHSC < 256) {
5635 MachineOperand &Base = RootDef->getOperand(1);
5636 return {{
5637 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5638 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5639 }};
5640 }
5641 return None;
5642}
5643
5644InstructionSelector::ComplexRendererFns
5645AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5646 unsigned Size,
5647 MachineRegisterInfo &MRI) const {
5648 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5649 return None;
5650 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5651 if (Adrp.getOpcode() != AArch64::ADRP)
5652 return None;
5653
5654 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5655 auto Offset = Adrp.getOperand(1).getOffset();
5656 if (Offset % Size != 0)
5657 return None;
5658
5659 auto GV = Adrp.getOperand(1).getGlobal();
5660 if (GV->isThreadLocal())
5661 return None;
5662
5663 auto &MF = *RootDef.getParent()->getParent();
5664 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5665 return None;
5666
5667 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5668 MachineIRBuilder MIRBuilder(RootDef);
5669 Register AdrpReg = Adrp.getOperand(0).getReg();
5670 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5671 [=](MachineInstrBuilder &MIB) {
5672 MIB.addGlobalAddress(GV, Offset,
5673 OpFlags | AArch64II::MO_PAGEOFF |
5674 AArch64II::MO_NC);
5675 }}};
5676}
5677
5678/// Select a "register plus scaled unsigned 12-bit immediate" address. The
5679/// "Size" argument is the size in bytes of the memory reference, which
5680/// determines the scale.
5681InstructionSelector::ComplexRendererFns
5682AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5683 unsigned Size) const {
5684 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5685 MachineRegisterInfo &MRI = MF.getRegInfo();
5686
5687 if (!Root.isReg())
4
Calling 'MachineOperand::isReg'
7
Returning from 'MachineOperand::isReg'
8
Taking false branch
5688 return None;
5689
5690 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5691 if (!RootDef)
9
Assuming 'RootDef' is non-null
10
Taking false branch
5692 return None;
5693
5694 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
11
Assuming the condition is false
12
Taking false branch
5695 return {{
5696 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5697 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5698 }};
5699 }
5700
5701 CodeModel::Model CM = MF.getTarget().getCodeModel();
5702 // Check if we can fold in the ADD of small code model ADRP + ADD address.