Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 6122, column 67
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-16-232930-107970-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "AArch64GlobalISelUtils.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "MCTargetDesc/AArch64MCTargetDesc.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineFunction.h"
33#include "llvm/CodeGen/MachineInstr.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineMemOperand.h"
36#include "llvm/CodeGen/MachineOperand.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetOpcodes.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/DerivedTypes.h"
41#include "llvm/IR/Instructions.h"
42#include "llvm/IR/PatternMatch.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/raw_ostream.h"
48
49#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
50
51using namespace llvm;
52using namespace MIPatternMatch;
53using namespace AArch64GISelUtils;
54
55namespace llvm {
56class BlockFrequencyInfo;
57class ProfileSummaryInfo;
58}
59
60namespace {
61
62#define GET_GLOBALISEL_PREDICATE_BITSET
63#include "AArch64GenGlobalISel.inc"
64#undef GET_GLOBALISEL_PREDICATE_BITSET
65
66class AArch64InstructionSelector : public InstructionSelector {
67public:
68 AArch64InstructionSelector(const AArch64TargetMachine &TM,
69 const AArch64Subtarget &STI,
70 const AArch64RegisterBankInfo &RBI);
71
72 bool select(MachineInstr &I) override;
73 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
74
75 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
76 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
77 BlockFrequencyInfo *BFI) override {
78 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
79 MIB.setMF(MF);
80
81 // hasFnAttribute() is expensive to call on every BRCOND selection, so
82 // cache it here for each run of the selector.
83 ProduceNonFlagSettingCondBr =
84 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
85 MFReturnAddr = Register();
86
87 processPHIs(MF);
88 }
89
90private:
91 /// tblgen-erated 'select' implementation, used as the initial selector for
92 /// the patterns that don't require complex C++.
93 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
94
95 // A lowering phase that runs before any selection attempts.
96 // Returns true if the instruction was modified.
97 bool preISelLower(MachineInstr &I);
98
99 // An early selection function that runs before the selectImpl() call.
100 bool earlySelect(MachineInstr &I);
101
102 // Do some preprocessing of G_PHIs before we begin selection.
103 void processPHIs(MachineFunction &MF);
104
105 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
106
107 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
108 bool contractCrossBankCopyIntoStore(MachineInstr &I,
109 MachineRegisterInfo &MRI);
110
111 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
112
113 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
114 MachineRegisterInfo &MRI) const;
115 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
116 MachineRegisterInfo &MRI) const;
117
118 ///@{
119 /// Helper functions for selectCompareBranch.
120 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
121 MachineIRBuilder &MIB) const;
122 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
123 MachineIRBuilder &MIB) const;
124 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
125 MachineIRBuilder &MIB) const;
126 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
127 MachineBasicBlock *DstMBB,
128 MachineIRBuilder &MIB) const;
129 ///@}
130
131 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
132 MachineRegisterInfo &MRI);
133
134 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
135 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
136
137 // Helper to generate an equivalent of scalar_to_vector into a new register,
138 // returned via 'Dst'.
139 MachineInstr *emitScalarToVector(unsigned EltSize,
140 const TargetRegisterClass *DstRC,
141 Register Scalar,
142 MachineIRBuilder &MIRBuilder) const;
143
144 /// Emit a lane insert into \p DstReg, or a new vector register if None is
145 /// provided.
146 ///
147 /// The lane inserted into is defined by \p LaneIdx. The vector source
148 /// register is given by \p SrcReg. The register containing the element is
149 /// given by \p EltReg.
150 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
151 Register EltReg, unsigned LaneIdx,
152 const RegisterBank &RB,
153 MachineIRBuilder &MIRBuilder) const;
154
155 /// Emit a sequence of instructions representing a constant \p CV for a
156 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
157 ///
158 /// \returns the last instruction in the sequence on success, and nullptr
159 /// otherwise.
160 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
161 MachineIRBuilder &MIRBuilder,
162 MachineRegisterInfo &MRI);
163
164 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
165 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
166 MachineRegisterInfo &MRI);
167 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
168 /// SUBREG_TO_REG.
169 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
170 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
171 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
172 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
173
174 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
176 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
177 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
178
179 /// Helper function to select vector load intrinsics like
180 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
181 /// \p Opc is the opcode that the selected instruction should use.
182 /// \p NumVecs is the number of vector destinations for the instruction.
183 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
184 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
185 MachineInstr &I);
186 bool selectIntrinsicWithSideEffects(MachineInstr &I,
187 MachineRegisterInfo &MRI);
188 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
189 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
190 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
191 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
192 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
194 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
195 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
197
198 unsigned emitConstantPoolEntry(const Constant *CPVal,
199 MachineFunction &MF) const;
200 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
201 MachineIRBuilder &MIRBuilder) const;
202
203 // Emit a vector concat operation.
204 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
205 Register Op2,
206 MachineIRBuilder &MIRBuilder) const;
207
208 // Emit an integer compare between LHS and RHS, which checks for Predicate.
209 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
210 MachineOperand &Predicate,
211 MachineIRBuilder &MIRBuilder) const;
212
213 /// Emit a floating point comparison between \p LHS and \p RHS.
214 /// \p Pred if given is the intended predicate to use.
215 MachineInstr *emitFPCompare(Register LHS, Register RHS,
216 MachineIRBuilder &MIRBuilder,
217 Optional<CmpInst::Predicate> = None) const;
218
219 MachineInstr *emitInstr(unsigned Opcode,
220 std::initializer_list<llvm::DstOp> DstOps,
221 std::initializer_list<llvm::SrcOp> SrcOps,
222 MachineIRBuilder &MIRBuilder,
223 const ComplexRendererFns &RenderFns = None) const;
224 /// Helper function to emit an add or sub instruction.
225 ///
226 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
227 /// in a specific order.
228 ///
229 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
230 ///
231 /// \code
232 /// const std::array<std::array<unsigned, 2>, 4> Table {
233 /// {{AArch64::ADDXri, AArch64::ADDWri},
234 /// {AArch64::ADDXrs, AArch64::ADDWrs},
235 /// {AArch64::ADDXrr, AArch64::ADDWrr},
236 /// {AArch64::SUBXri, AArch64::SUBWri},
237 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
238 /// \endcode
239 ///
240 /// Each row in the table corresponds to a different addressing mode. Each
241 /// column corresponds to a different register size.
242 ///
243 /// \attention Rows must be structured as follows:
244 /// - Row 0: The ri opcode variants
245 /// - Row 1: The rs opcode variants
246 /// - Row 2: The rr opcode variants
247 /// - Row 3: The ri opcode variants for negative immediates
248 /// - Row 4: The rx opcode variants
249 ///
250 /// \attention Columns must be structured as follows:
251 /// - Column 0: The 64-bit opcode variants
252 /// - Column 1: The 32-bit opcode variants
253 ///
254 /// \p Dst is the destination register of the binop to emit.
255 /// \p LHS is the left-hand operand of the binop to emit.
256 /// \p RHS is the right-hand operand of the binop to emit.
257 MachineInstr *emitAddSub(
258 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
259 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
260 MachineIRBuilder &MIRBuilder) const;
261 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
262 MachineOperand &RHS,
263 MachineIRBuilder &MIRBuilder) const;
264 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
265 MachineIRBuilder &MIRBuilder) const;
266 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
273 AArch64CC::CondCode CC,
274 MachineIRBuilder &MIRBuilder) const;
275 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
276 const RegisterBank &DstRB, LLT ScalarTy,
277 Register VecReg, unsigned LaneIdx,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
280 AArch64CC::CondCode Pred,
281 MachineIRBuilder &MIRBuilder) const;
282 /// Emit a CSet for a FP compare.
283 ///
284 /// \p Dst is expected to be a 32-bit scalar register.
285 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
286 MachineIRBuilder &MIRBuilder) const;
287
288 /// Emit the overflow op for \p Opcode.
289 ///
290 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
291 /// G_USUBO, etc.
292 std::pair<MachineInstr *, AArch64CC::CondCode>
293 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
294 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
295
296 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
297 /// \p IsNegative is true if the test should be "not zero".
298 /// This will also optimize the test bit instruction when possible.
299 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
300 MachineBasicBlock *DstMBB,
301 MachineIRBuilder &MIB) const;
302
303 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
304 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
305 MachineBasicBlock *DestMBB,
306 MachineIRBuilder &MIB) const;
307
308 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
309 // We use these manually instead of using the importer since it doesn't
310 // support SDNodeXForm.
311 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
312 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
313 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
314 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
315
316 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
317 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
318 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
319
320 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
321 unsigned Size) const;
322
323 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
324 return selectAddrModeUnscaled(Root, 1);
325 }
326 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
327 return selectAddrModeUnscaled(Root, 2);
328 }
329 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
330 return selectAddrModeUnscaled(Root, 4);
331 }
332 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
333 return selectAddrModeUnscaled(Root, 8);
334 }
335 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
336 return selectAddrModeUnscaled(Root, 16);
337 }
338
339 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
340 /// from complex pattern matchers like selectAddrModeIndexed().
341 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
342 MachineRegisterInfo &MRI) const;
343
344 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
345 unsigned Size) const;
346 template <int Width>
347 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
348 return selectAddrModeIndexed(Root, Width / 8);
349 }
350
351 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
352 const MachineRegisterInfo &MRI) const;
353 ComplexRendererFns
354 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
355 unsigned SizeInBytes) const;
356
357 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
358 /// or not a shift + extend should be folded into an addressing mode. Returns
359 /// None when this is not profitable or possible.
360 ComplexRendererFns
361 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
362 MachineOperand &Offset, unsigned SizeInBytes,
363 bool WantsExt) const;
364 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
365 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
366 unsigned SizeInBytes) const;
367 template <int Width>
368 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
369 return selectAddrModeXRO(Root, Width / 8);
370 }
371
372 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374 template <int Width>
375 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
376 return selectAddrModeWRO(Root, Width / 8);
377 }
378
379 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
380 bool AllowROR = false) const;
381
382 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
383 return selectShiftedRegister(Root);
384 }
385
386 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
387 return selectShiftedRegister(Root, true);
388 }
389
390 /// Given an extend instruction, determine the correct shift-extend type for
391 /// that instruction.
392 ///
393 /// If the instruction is going to be used in a load or store, pass
394 /// \p IsLoadStore = true.
395 AArch64_AM::ShiftExtendType
396 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
397 bool IsLoadStore = false) const;
398
399 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
400 ///
401 /// \returns Either \p Reg if no change was necessary, or the new register
402 /// created by moving \p Reg.
403 ///
404 /// Note: This uses emitCopy right now.
405 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
406 MachineIRBuilder &MIB) const;
407
408 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
409
410 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
411 int OpIdx = -1) const;
412 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
413 int OpIdx = -1) const;
414 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
415 int OpIdx = -1) const;
416 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
417 int OpIdx = -1) const;
418 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
419 int OpIdx = -1) const;
420 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
421 int OpIdx = -1) const;
422
423 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
424 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
425
426 // Optimization methods.
427 bool tryOptSelect(MachineInstr &MI);
428 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
429 MachineOperand &Predicate,
430 MachineIRBuilder &MIRBuilder) const;
431
432 /// Return true if \p MI is a load or store of \p NumBytes bytes.
433 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
434
435 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
436 /// register zeroed out. In other words, the result of MI has been explicitly
437 /// zero extended.
438 bool isDef32(const MachineInstr &MI) const;
439
440 const AArch64TargetMachine &TM;
441 const AArch64Subtarget &STI;
442 const AArch64InstrInfo &TII;
443 const AArch64RegisterInfo &TRI;
444 const AArch64RegisterBankInfo &RBI;
445
446 bool ProduceNonFlagSettingCondBr = false;
447
448 // Some cached values used during selection.
449 // We use LR as a live-in register, and we keep track of it here as it can be
450 // clobbered by calls.
451 Register MFReturnAddr;
452
453 MachineIRBuilder MIB;
454
455#define GET_GLOBALISEL_PREDICATES_DECL
456#include "AArch64GenGlobalISel.inc"
457#undef GET_GLOBALISEL_PREDICATES_DECL
458
459// We declare the temporaries used by selectImpl() in the class to minimize the
460// cost of constructing placeholder values.
461#define GET_GLOBALISEL_TEMPORARIES_DECL
462#include "AArch64GenGlobalISel.inc"
463#undef GET_GLOBALISEL_TEMPORARIES_DECL
464};
465
466} // end anonymous namespace
467
468#define GET_GLOBALISEL_IMPL
469#include "AArch64GenGlobalISel.inc"
470#undef GET_GLOBALISEL_IMPL
471
472AArch64InstructionSelector::AArch64InstructionSelector(
473 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
474 const AArch64RegisterBankInfo &RBI)
475 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
476 RBI(RBI),
477#define GET_GLOBALISEL_PREDICATES_INIT
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_INIT
480#define GET_GLOBALISEL_TEMPORARIES_INIT
481#include "AArch64GenGlobalISel.inc"
482#undef GET_GLOBALISEL_TEMPORARIES_INIT
483{
484}
485
486// FIXME: This should be target-independent, inferred from the types declared
487// for each class in the bank.
488static const TargetRegisterClass *
489getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
490 const RegisterBankInfo &RBI,
491 bool GetAllRegSet = false) {
492 if (RB.getID() == AArch64::GPRRegBankID) {
493 if (Ty.getSizeInBits() <= 32)
494 return GetAllRegSet ? &AArch64::GPR32allRegClass
495 : &AArch64::GPR32RegClass;
496 if (Ty.getSizeInBits() == 64)
497 return GetAllRegSet ? &AArch64::GPR64allRegClass
498 : &AArch64::GPR64RegClass;
499 if (Ty.getSizeInBits() == 128)
500 return &AArch64::XSeqPairsClassRegClass;
501 return nullptr;
502 }
503
504 if (RB.getID() == AArch64::FPRRegBankID) {
505 switch (Ty.getSizeInBits()) {
506 case 8:
507 return &AArch64::FPR8RegClass;
508 case 16:
509 return &AArch64::FPR16RegClass;
510 case 32:
511 return &AArch64::FPR32RegClass;
512 case 64:
513 return &AArch64::FPR64RegClass;
514 case 128:
515 return &AArch64::FPR128RegClass;
516 }
517 return nullptr;
518 }
519
520 return nullptr;
521}
522
523/// Given a register bank, and size in bits, return the smallest register class
524/// that can represent that combination.
525static const TargetRegisterClass *
526getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
527 bool GetAllRegSet = false) {
528 unsigned RegBankID = RB.getID();
529
530 if (RegBankID == AArch64::GPRRegBankID) {
531 if (SizeInBits <= 32)
532 return GetAllRegSet ? &AArch64::GPR32allRegClass
533 : &AArch64::GPR32RegClass;
534 if (SizeInBits == 64)
535 return GetAllRegSet ? &AArch64::GPR64allRegClass
536 : &AArch64::GPR64RegClass;
537 if (SizeInBits == 128)
538 return &AArch64::XSeqPairsClassRegClass;
539 }
540
541 if (RegBankID == AArch64::FPRRegBankID) {
542 switch (SizeInBits) {
543 default:
544 return nullptr;
545 case 8:
546 return &AArch64::FPR8RegClass;
547 case 16:
548 return &AArch64::FPR16RegClass;
549 case 32:
550 return &AArch64::FPR32RegClass;
551 case 64:
552 return &AArch64::FPR64RegClass;
553 case 128:
554 return &AArch64::FPR128RegClass;
555 }
556 }
557
558 return nullptr;
559}
560
561/// Returns the correct subregister to use for a given register class.
562static bool getSubRegForClass(const TargetRegisterClass *RC,
563 const TargetRegisterInfo &TRI, unsigned &SubReg) {
564 switch (TRI.getRegSizeInBits(*RC)) {
565 case 8:
566 SubReg = AArch64::bsub;
567 break;
568 case 16:
569 SubReg = AArch64::hsub;
570 break;
571 case 32:
572 if (RC != &AArch64::FPR32RegClass)
573 SubReg = AArch64::sub_32;
574 else
575 SubReg = AArch64::ssub;
576 break;
577 case 64:
578 SubReg = AArch64::dsub;
579 break;
580 default:
581 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
582 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
583 return false;
584 }
585
586 return true;
587}
588
589/// Returns the minimum size the given register bank can hold.
590static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
591 switch (RB.getID()) {
592 case AArch64::GPRRegBankID:
593 return 32;
594 case AArch64::FPRRegBankID:
595 return 8;
596 default:
597 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 597)
;
598 }
599}
600
601/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
602/// Helper function for functions like createDTuple and createQTuple.
603///
604/// \p RegClassIDs - The list of register class IDs available for some tuple of
605/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
606/// expected to contain between 2 and 4 tuple classes.
607///
608/// \p SubRegs - The list of subregister classes associated with each register
609/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
610/// subregister class. The index of each subregister class is expected to
611/// correspond with the index of each register class.
612///
613/// \returns Either the destination register of REG_SEQUENCE instruction that
614/// was created, or the 0th element of \p Regs if \p Regs contains a single
615/// element.
616static Register createTuple(ArrayRef<Register> Regs,
617 const unsigned RegClassIDs[],
618 const unsigned SubRegs[], MachineIRBuilder &MIB) {
619 unsigned NumRegs = Regs.size();
620 if (NumRegs == 1)
621 return Regs[0];
622 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 623, __extension__ __PRETTY_FUNCTION__))
623 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 623, __extension__ __PRETTY_FUNCTION__))
;
624 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
625 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
626 auto RegSequence =
627 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
628 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
629 RegSequence.addUse(Regs[I]);
630 RegSequence.addImm(SubRegs[I]);
631 }
632 return RegSequence.getReg(0);
633}
634
635/// Create a tuple of D-registers using the registers in \p Regs.
636static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
637 static const unsigned RegClassIDs[] = {
638 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
639 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
640 AArch64::dsub2, AArch64::dsub3};
641 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
642}
643
644/// Create a tuple of Q-registers using the registers in \p Regs.
645static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
646 static const unsigned RegClassIDs[] = {
647 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
648 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
649 AArch64::qsub2, AArch64::qsub3};
650 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
651}
652
653static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
654 auto &MI = *Root.getParent();
655 auto &MBB = *MI.getParent();
656 auto &MF = *MBB.getParent();
657 auto &MRI = MF.getRegInfo();
658 uint64_t Immed;
659 if (Root.isImm())
660 Immed = Root.getImm();
661 else if (Root.isCImm())
662 Immed = Root.getCImm()->getZExtValue();
663 else if (Root.isReg()) {
664 auto ValAndVReg =
665 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
666 if (!ValAndVReg)
667 return None;
668 Immed = ValAndVReg->Value.getSExtValue();
669 } else
670 return None;
671 return Immed;
672}
673
674/// Check whether \p I is a currently unsupported binary operation:
675/// - it has an unsized type
676/// - an operand is not a vreg
677/// - all operands are not in the same bank
678/// These are checks that should someday live in the verifier, but right now,
679/// these are mostly limitations of the aarch64 selector.
680static bool unsupportedBinOp(const MachineInstr &I,
681 const AArch64RegisterBankInfo &RBI,
682 const MachineRegisterInfo &MRI,
683 const AArch64RegisterInfo &TRI) {
684 LLT Ty = MRI.getType(I.getOperand(0).getReg());
685 if (!Ty.isValid()) {
686 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
687 return true;
688 }
689
690 const RegisterBank *PrevOpBank = nullptr;
691 for (auto &MO : I.operands()) {
692 // FIXME: Support non-register operands.
693 if (!MO.isReg()) {
694 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
695 return true;
696 }
697
698 // FIXME: Can generic operations have physical registers operands? If
699 // so, this will need to be taught about that, and we'll need to get the
700 // bank out of the minimal class for the register.
701 // Either way, this needs to be documented (and possibly verified).
702 if (!Register::isVirtualRegister(MO.getReg())) {
703 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
704 return true;
705 }
706
707 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
708 if (!OpBank) {
709 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
710 return true;
711 }
712
713 if (PrevOpBank && OpBank != PrevOpBank) {
714 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
715 return true;
716 }
717 PrevOpBank = OpBank;
718 }
719 return false;
720}
721
722/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
723/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
724/// and of size \p OpSize.
725/// \returns \p GenericOpc if the combination is unsupported.
726static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
727 unsigned OpSize) {
728 switch (RegBankID) {
729 case AArch64::GPRRegBankID:
730 if (OpSize == 32) {
731 switch (GenericOpc) {
732 case TargetOpcode::G_SHL:
733 return AArch64::LSLVWr;
734 case TargetOpcode::G_LSHR:
735 return AArch64::LSRVWr;
736 case TargetOpcode::G_ASHR:
737 return AArch64::ASRVWr;
738 default:
739 return GenericOpc;
740 }
741 } else if (OpSize == 64) {
742 switch (GenericOpc) {
743 case TargetOpcode::G_PTR_ADD:
744 return AArch64::ADDXrr;
745 case TargetOpcode::G_SHL:
746 return AArch64::LSLVXr;
747 case TargetOpcode::G_LSHR:
748 return AArch64::LSRVXr;
749 case TargetOpcode::G_ASHR:
750 return AArch64::ASRVXr;
751 default:
752 return GenericOpc;
753 }
754 }
755 break;
756 case AArch64::FPRRegBankID:
757 switch (OpSize) {
758 case 32:
759 switch (GenericOpc) {
760 case TargetOpcode::G_FADD:
761 return AArch64::FADDSrr;
762 case TargetOpcode::G_FSUB:
763 return AArch64::FSUBSrr;
764 case TargetOpcode::G_FMUL:
765 return AArch64::FMULSrr;
766 case TargetOpcode::G_FDIV:
767 return AArch64::FDIVSrr;
768 default:
769 return GenericOpc;
770 }
771 case 64:
772 switch (GenericOpc) {
773 case TargetOpcode::G_FADD:
774 return AArch64::FADDDrr;
775 case TargetOpcode::G_FSUB:
776 return AArch64::FSUBDrr;
777 case TargetOpcode::G_FMUL:
778 return AArch64::FMULDrr;
779 case TargetOpcode::G_FDIV:
780 return AArch64::FDIVDrr;
781 case TargetOpcode::G_OR:
782 return AArch64::ORRv8i8;
783 default:
784 return GenericOpc;
785 }
786 }
787 break;
788 }
789 return GenericOpc;
790}
791
792/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
793/// appropriate for the (value) register bank \p RegBankID and of memory access
794/// size \p OpSize. This returns the variant with the base+unsigned-immediate
795/// addressing mode (e.g., LDRXui).
796/// \returns \p GenericOpc if the combination is unsupported.
797static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
798 unsigned OpSize) {
799 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
800 switch (RegBankID) {
801 case AArch64::GPRRegBankID:
802 switch (OpSize) {
803 case 8:
804 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
805 case 16:
806 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
807 case 32:
808 return isStore ? AArch64::STRWui : AArch64::LDRWui;
809 case 64:
810 return isStore ? AArch64::STRXui : AArch64::LDRXui;
811 }
812 break;
813 case AArch64::FPRRegBankID:
814 switch (OpSize) {
815 case 8:
816 return isStore ? AArch64::STRBui : AArch64::LDRBui;
817 case 16:
818 return isStore ? AArch64::STRHui : AArch64::LDRHui;
819 case 32:
820 return isStore ? AArch64::STRSui : AArch64::LDRSui;
821 case 64:
822 return isStore ? AArch64::STRDui : AArch64::LDRDui;
823 case 128:
824 return isStore ? AArch64::STRQui : AArch64::LDRQui;
825 }
826 break;
827 }
828 return GenericOpc;
829}
830
831#ifndef NDEBUG
832/// Helper function that verifies that we have a valid copy at the end of
833/// selectCopy. Verifies that the source and dest have the expected sizes and
834/// then returns true.
835static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
836 const MachineRegisterInfo &MRI,
837 const TargetRegisterInfo &TRI,
838 const RegisterBankInfo &RBI) {
839 const Register DstReg = I.getOperand(0).getReg();
840 const Register SrcReg = I.getOperand(1).getReg();
841 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
842 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
843
844 // Make sure the size of the source and dest line up.
845 assert((static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
846 (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
847 // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
848 // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
849 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
850 // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
851 // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
852 // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
853 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
854 "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 854, __extension__ __PRETTY_FUNCTION__))
;
855
856 // Check the size of the destination.
857 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
858 "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
;
859
860 return true;
861}
862#endif
863
864/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
865/// to \p *To.
866///
867/// E.g "To = COPY SrcReg:SubReg"
868static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
869 const RegisterBankInfo &RBI, Register SrcReg,
870 const TargetRegisterClass *To, unsigned SubReg) {
871 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 871, __extension__ __PRETTY_FUNCTION__))
;
872 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 872, __extension__ __PRETTY_FUNCTION__))
;
873 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 873, __extension__ __PRETTY_FUNCTION__))
;
874
875 MachineIRBuilder MIB(I);
876 auto SubRegCopy =
877 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
878 MachineOperand &RegOp = I.getOperand(1);
879 RegOp.setReg(SubRegCopy.getReg(0));
880
881 // It's possible that the destination register won't be constrained. Make
882 // sure that happens.
883 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
884 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
885
886 return true;
887}
888
889/// Helper function to get the source and destination register classes for a
890/// copy. Returns a std::pair containing the source register class for the
891/// copy, and the destination register class for the copy. If a register class
892/// cannot be determined, then it will be nullptr.
893static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
894getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
895 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
896 const RegisterBankInfo &RBI) {
897 Register DstReg = I.getOperand(0).getReg();
898 Register SrcReg = I.getOperand(1).getReg();
899 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
900 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
901 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
902 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
903
904 // Special casing for cross-bank copies of s1s. We can technically represent
905 // a 1-bit value with any size of register. The minimum size for a GPR is 32
906 // bits. So, we need to put the FPR on 32 bits as well.
907 //
908 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
909 // then we can pull it into the helpers that get the appropriate class for a
910 // register bank. Or make a new helper that carries along some constraint
911 // information.
912 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
913 SrcSize = DstSize = 32;
914
915 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
916 getMinClassForRegBank(DstRegBank, DstSize, true)};
917}
918
919static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
920 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
921 const RegisterBankInfo &RBI) {
922 Register DstReg = I.getOperand(0).getReg();
923 Register SrcReg = I.getOperand(1).getReg();
924 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
925 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
926
927 // Find the correct register classes for the source and destination registers.
928 const TargetRegisterClass *SrcRC;
929 const TargetRegisterClass *DstRC;
930 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
931
932 if (!DstRC) {
933 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
934 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
935 return false;
936 }
937
938 // A couple helpers below, for making sure that the copy we produce is valid.
939
940 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
941 // to verify that the src and dst are the same size, since that's handled by
942 // the SUBREG_TO_REG.
943 bool KnownValid = false;
944
945 // Returns true, or asserts if something we don't expect happens. Instead of
946 // returning true, we return isValidCopy() to ensure that we verify the
947 // result.
948 auto CheckCopy = [&]() {
949 // If we have a bitcast or something, we can't have physical registers.
950 assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 953, __extension__ __PRETTY_FUNCTION__))
951 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 953, __extension__ __PRETTY_FUNCTION__))
952 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 953, __extension__ __PRETTY_FUNCTION__))
953 "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 953, __extension__ __PRETTY_FUNCTION__))
;
954 bool ValidCopy = true;
955#ifndef NDEBUG
956 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
957 assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy."
) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 957, __extension__ __PRETTY_FUNCTION__))
;
958#endif
959 (void)KnownValid;
960 return ValidCopy;
961 };
962
963 // Is this a copy? If so, then we may need to insert a subregister copy.
964 if (I.isCopy()) {
965 // Yes. Check if there's anything to fix up.
966 if (!SrcRC) {
967 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
968 return false;
969 }
970
971 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
972 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
973 unsigned SubReg;
974
975 // If the source bank doesn't support a subregister copy small enough,
976 // then we first need to copy to the destination bank.
977 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
978 const TargetRegisterClass *DstTempRC =
979 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
980 getSubRegForClass(DstRC, TRI, SubReg);
981
982 MachineIRBuilder MIB(I);
983 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
984 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
985 } else if (SrcSize > DstSize) {
986 // If the source register is bigger than the destination we need to
987 // perform a subregister copy.
988 const TargetRegisterClass *SubRegRC =
989 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
990 getSubRegForClass(SubRegRC, TRI, SubReg);
991 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
992 } else if (DstSize > SrcSize) {
993 // If the destination register is bigger than the source we need to do
994 // a promotion using SUBREG_TO_REG.
995 const TargetRegisterClass *PromotionRC =
996 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
997 getSubRegForClass(SrcRC, TRI, SubReg);
998
999 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1000 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1001 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1002 .addImm(0)
1003 .addUse(SrcReg)
1004 .addImm(SubReg);
1005 MachineOperand &RegOp = I.getOperand(1);
1006 RegOp.setReg(PromoteReg);
1007
1008 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1009 KnownValid = true;
1010 }
1011
1012 // If the destination is a physical register, then there's nothing to
1013 // change, so we're done.
1014 if (Register::isPhysicalRegister(DstReg))
1015 return CheckCopy();
1016 }
1017
1018 // No need to constrain SrcReg. It will get constrained when we hit another
1019 // of its use or its defs. Copies do not have constraints.
1020 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1021 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1022 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1023 return false;
1024 }
1025
1026 // If this a GPR ZEXT that we want to just reduce down into a copy.
1027 // The sizes will be mismatched with the source < 32b but that's ok.
1028 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1029 I.setDesc(TII.get(AArch64::COPY));
1030 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1030, __extension__ __PRETTY_FUNCTION__))
;
1031 return selectCopy(I, TII, MRI, TRI, RBI);
1032 }
1033
1034 I.setDesc(TII.get(AArch64::COPY));
1035 return CheckCopy();
1036}
1037
1038static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1039 if (!DstTy.isScalar() || !SrcTy.isScalar())
1040 return GenericOpc;
1041
1042 const unsigned DstSize = DstTy.getSizeInBits();
1043 const unsigned SrcSize = SrcTy.getSizeInBits();
1044
1045 switch (DstSize) {
1046 case 32:
1047 switch (SrcSize) {
1048 case 32:
1049 switch (GenericOpc) {
1050 case TargetOpcode::G_SITOFP:
1051 return AArch64::SCVTFUWSri;
1052 case TargetOpcode::G_UITOFP:
1053 return AArch64::UCVTFUWSri;
1054 case TargetOpcode::G_FPTOSI:
1055 return AArch64::FCVTZSUWSr;
1056 case TargetOpcode::G_FPTOUI:
1057 return AArch64::FCVTZUUWSr;
1058 default:
1059 return GenericOpc;
1060 }
1061 case 64:
1062 switch (GenericOpc) {
1063 case TargetOpcode::G_SITOFP:
1064 return AArch64::SCVTFUXSri;
1065 case TargetOpcode::G_UITOFP:
1066 return AArch64::UCVTFUXSri;
1067 case TargetOpcode::G_FPTOSI:
1068 return AArch64::FCVTZSUWDr;
1069 case TargetOpcode::G_FPTOUI:
1070 return AArch64::FCVTZUUWDr;
1071 default:
1072 return GenericOpc;
1073 }
1074 default:
1075 return GenericOpc;
1076 }
1077 case 64:
1078 switch (SrcSize) {
1079 case 32:
1080 switch (GenericOpc) {
1081 case TargetOpcode::G_SITOFP:
1082 return AArch64::SCVTFUWDri;
1083 case TargetOpcode::G_UITOFP:
1084 return AArch64::UCVTFUWDri;
1085 case TargetOpcode::G_FPTOSI:
1086 return AArch64::FCVTZSUXSr;
1087 case TargetOpcode::G_FPTOUI:
1088 return AArch64::FCVTZUUXSr;
1089 default:
1090 return GenericOpc;
1091 }
1092 case 64:
1093 switch (GenericOpc) {
1094 case TargetOpcode::G_SITOFP:
1095 return AArch64::SCVTFUXDri;
1096 case TargetOpcode::G_UITOFP:
1097 return AArch64::UCVTFUXDri;
1098 case TargetOpcode::G_FPTOSI:
1099 return AArch64::FCVTZSUXDr;
1100 case TargetOpcode::G_FPTOUI:
1101 return AArch64::FCVTZUUXDr;
1102 default:
1103 return GenericOpc;
1104 }
1105 default:
1106 return GenericOpc;
1107 }
1108 default:
1109 return GenericOpc;
1110 };
1111 return GenericOpc;
1112}
1113
1114MachineInstr *
1115AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1116 Register False, AArch64CC::CondCode CC,
1117 MachineIRBuilder &MIB) const {
1118 MachineRegisterInfo &MRI = *MIB.getMRI();
1119 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1121, __extension__ __PRETTY_FUNCTION__))
1120 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1121, __extension__ __PRETTY_FUNCTION__))
1121 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1121, __extension__ __PRETTY_FUNCTION__))
;
1122 LLT Ty = MRI.getType(True);
1123 if (Ty.isVector())
1124 return nullptr;
1125 const unsigned Size = Ty.getSizeInBits();
1126 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1127, __extension__ __PRETTY_FUNCTION__))
1127 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1127, __extension__ __PRETTY_FUNCTION__))
;
1128 const bool Is32Bit = Size == 32;
1129 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1130 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1131 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1132 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1133 return &*FCSel;
1134 }
1135
1136 // By default, we'll try and emit a CSEL.
1137 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1138 bool Optimized = false;
1139 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1140 &Optimized](Register &Reg, Register &OtherReg,
1141 bool Invert) {
1142 if (Optimized)
1143 return false;
1144
1145 // Attempt to fold:
1146 //
1147 // %sub = G_SUB 0, %x
1148 // %select = G_SELECT cc, %reg, %sub
1149 //
1150 // Into:
1151 // %select = CSNEG %reg, %x, cc
1152 Register MatchReg;
1153 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1154 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1155 Reg = MatchReg;
1156 if (Invert) {
1157 CC = AArch64CC::getInvertedCondCode(CC);
1158 std::swap(Reg, OtherReg);
1159 }
1160 return true;
1161 }
1162
1163 // Attempt to fold:
1164 //
1165 // %xor = G_XOR %x, -1
1166 // %select = G_SELECT cc, %reg, %xor
1167 //
1168 // Into:
1169 // %select = CSINV %reg, %x, cc
1170 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1171 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1172 Reg = MatchReg;
1173 if (Invert) {
1174 CC = AArch64CC::getInvertedCondCode(CC);
1175 std::swap(Reg, OtherReg);
1176 }
1177 return true;
1178 }
1179
1180 // Attempt to fold:
1181 //
1182 // %add = G_ADD %x, 1
1183 // %select = G_SELECT cc, %reg, %add
1184 //
1185 // Into:
1186 // %select = CSINC %reg, %x, cc
1187 if (mi_match(Reg, MRI,
1188 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1189 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1190 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1191 Reg = MatchReg;
1192 if (Invert) {
1193 CC = AArch64CC::getInvertedCondCode(CC);
1194 std::swap(Reg, OtherReg);
1195 }
1196 return true;
1197 }
1198
1199 return false;
1200 };
1201
1202 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1203 // true/false values are constants.
1204 // FIXME: All of these patterns already exist in tablegen. We should be
1205 // able to import these.
1206 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1207 &Optimized]() {
1208 if (Optimized)
1209 return false;
1210 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1211 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1212 if (!TrueCst && !FalseCst)
1213 return false;
1214
1215 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1216 if (TrueCst && FalseCst) {
1217 int64_t T = TrueCst->Value.getSExtValue();
1218 int64_t F = FalseCst->Value.getSExtValue();
1219
1220 if (T == 0 && F == 1) {
1221 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1222 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1223 True = ZReg;
1224 False = ZReg;
1225 return true;
1226 }
1227
1228 if (T == 0 && F == -1) {
1229 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1231 True = ZReg;
1232 False = ZReg;
1233 return true;
1234 }
1235 }
1236
1237 if (TrueCst) {
1238 int64_t T = TrueCst->Value.getSExtValue();
1239 if (T == 1) {
1240 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1241 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1242 True = False;
1243 False = ZReg;
1244 CC = AArch64CC::getInvertedCondCode(CC);
1245 return true;
1246 }
1247
1248 if (T == -1) {
1249 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1250 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1251 True = False;
1252 False = ZReg;
1253 CC = AArch64CC::getInvertedCondCode(CC);
1254 return true;
1255 }
1256 }
1257
1258 if (FalseCst) {
1259 int64_t F = FalseCst->Value.getSExtValue();
1260 if (F == 1) {
1261 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1262 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1263 False = ZReg;
1264 return true;
1265 }
1266
1267 if (F == -1) {
1268 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1269 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1270 False = ZReg;
1271 return true;
1272 }
1273 }
1274 return false;
1275 };
1276
1277 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1278 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1279 Optimized |= TryOptSelectCst();
1280 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1281 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1282 return &*SelectInst;
1283}
1284
1285static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1286 switch (P) {
1287 default:
1288 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1288)
;
1289 case CmpInst::ICMP_NE:
1290 return AArch64CC::NE;
1291 case CmpInst::ICMP_EQ:
1292 return AArch64CC::EQ;
1293 case CmpInst::ICMP_SGT:
1294 return AArch64CC::GT;
1295 case CmpInst::ICMP_SGE:
1296 return AArch64CC::GE;
1297 case CmpInst::ICMP_SLT:
1298 return AArch64CC::LT;
1299 case CmpInst::ICMP_SLE:
1300 return AArch64CC::LE;
1301 case CmpInst::ICMP_UGT:
1302 return AArch64CC::HI;
1303 case CmpInst::ICMP_UGE:
1304 return AArch64CC::HS;
1305 case CmpInst::ICMP_ULT:
1306 return AArch64CC::LO;
1307 case CmpInst::ICMP_ULE:
1308 return AArch64CC::LS;
1309 }
1310}
1311
1312/// Return a register which can be used as a bit to test in a TB(N)Z.
1313static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1314 MachineRegisterInfo &MRI) {
1315 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1315, __extension__ __PRETTY_FUNCTION__))
;
1316 bool HasZext = false;
1317 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1318 unsigned Opc = MI->getOpcode();
1319
1320 if (!MI->getOperand(0).isReg() ||
1321 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1322 break;
1323
1324 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1325 //
1326 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1327 // on the truncated x is the same as the bit number on x.
1328 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1329 Opc == TargetOpcode::G_TRUNC) {
1330 if (Opc == TargetOpcode::G_ZEXT)
1331 HasZext = true;
1332
1333 Register NextReg = MI->getOperand(1).getReg();
1334 // Did we find something worth folding?
1335 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1336 break;
1337
1338 // NextReg is worth folding. Keep looking.
1339 Reg = NextReg;
1340 continue;
1341 }
1342
1343 // Attempt to find a suitable operation with a constant on one side.
1344 Optional<uint64_t> C;
1345 Register TestReg;
1346 switch (Opc) {
1347 default:
1348 break;
1349 case TargetOpcode::G_AND:
1350 case TargetOpcode::G_XOR: {
1351 TestReg = MI->getOperand(1).getReg();
1352 Register ConstantReg = MI->getOperand(2).getReg();
1353 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1354 if (!VRegAndVal) {
1355 // AND commutes, check the other side for a constant.
1356 // FIXME: Can we canonicalize the constant so that it's always on the
1357 // same side at some point earlier?
1358 std::swap(ConstantReg, TestReg);
1359 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1360 }
1361 if (VRegAndVal) {
1362 if (HasZext)
1363 C = VRegAndVal->Value.getZExtValue();
1364 else
1365 C = VRegAndVal->Value.getSExtValue();
1366 }
1367 break;
1368 }
1369 case TargetOpcode::G_ASHR:
1370 case TargetOpcode::G_LSHR:
1371 case TargetOpcode::G_SHL: {
1372 TestReg = MI->getOperand(1).getReg();
1373 auto VRegAndVal =
1374 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1375 if (VRegAndVal)
1376 C = VRegAndVal->Value.getSExtValue();
1377 break;
1378 }
1379 }
1380
1381 // Didn't find a constant or viable register. Bail out of the loop.
1382 if (!C || !TestReg.isValid())
1383 break;
1384
1385 // We found a suitable instruction with a constant. Check to see if we can
1386 // walk through the instruction.
1387 Register NextReg;
1388 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1389 switch (Opc) {
1390 default:
1391 break;
1392 case TargetOpcode::G_AND:
1393 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1394 if ((*C >> Bit) & 1)
1395 NextReg = TestReg;
1396 break;
1397 case TargetOpcode::G_SHL:
1398 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1399 // the type of the register.
1400 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1401 NextReg = TestReg;
1402 Bit = Bit - *C;
1403 }
1404 break;
1405 case TargetOpcode::G_ASHR:
1406 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1407 // in x
1408 NextReg = TestReg;
1409 Bit = Bit + *C;
1410 if (Bit >= TestRegSize)
1411 Bit = TestRegSize - 1;
1412 break;
1413 case TargetOpcode::G_LSHR:
1414 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1415 if ((Bit + *C) < TestRegSize) {
1416 NextReg = TestReg;
1417 Bit = Bit + *C;
1418 }
1419 break;
1420 case TargetOpcode::G_XOR:
1421 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1422 // appropriate.
1423 //
1424 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1425 //
1426 // tbz x', b -> tbnz x, b
1427 //
1428 // Because x' only has the b-th bit set if x does not.
1429 if ((*C >> Bit) & 1)
1430 Invert = !Invert;
1431 NextReg = TestReg;
1432 break;
1433 }
1434
1435 // Check if we found anything worth folding.
1436 if (!NextReg.isValid())
1437 return Reg;
1438 Reg = NextReg;
1439 }
1440
1441 return Reg;
1442}
1443
1444MachineInstr *AArch64InstructionSelector::emitTestBit(
1445 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1446 MachineIRBuilder &MIB) const {
1447 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1447, __extension__ __PRETTY_FUNCTION__))
;
1448 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1449, __extension__ __PRETTY_FUNCTION__))
1449 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1449, __extension__ __PRETTY_FUNCTION__))
;
1450 MachineRegisterInfo &MRI = *MIB.getMRI();
1451
1452 // Attempt to optimize the test bit by walking over instructions.
1453 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1454 LLT Ty = MRI.getType(TestReg);
1455 unsigned Size = Ty.getSizeInBits();
1456 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1456, __extension__ __PRETTY_FUNCTION__))
;
1457 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1457, __extension__ __PRETTY_FUNCTION__))
;
1458
1459 // When the test register is a 64-bit register, we have to narrow to make
1460 // TBNZW work.
1461 bool UseWReg = Bit < 32;
1462 unsigned NecessarySize = UseWReg ? 32 : 64;
1463 if (Size != NecessarySize)
1464 TestReg = moveScalarRegClass(
1465 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1466 MIB);
1467
1468 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1469 {AArch64::TBZW, AArch64::TBNZW}};
1470 unsigned Opc = OpcTable[UseWReg][IsNegative];
1471 auto TestBitMI =
1472 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1473 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1474 return &*TestBitMI;
1475}
1476
1477bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1478 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1479 MachineIRBuilder &MIB) const {
1480 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1480, __extension__ __PRETTY_FUNCTION__))
;
1481 // Given something like this:
1482 //
1483 // %x = ...Something...
1484 // %one = G_CONSTANT i64 1
1485 // %zero = G_CONSTANT i64 0
1486 // %and = G_AND %x, %one
1487 // %cmp = G_ICMP intpred(ne), %and, %zero
1488 // %cmp_trunc = G_TRUNC %cmp
1489 // G_BRCOND %cmp_trunc, %bb.3
1490 //
1491 // We want to try and fold the AND into the G_BRCOND and produce either a
1492 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1493 //
1494 // In this case, we'd get
1495 //
1496 // TBNZ %x %bb.3
1497 //
1498
1499 // Check if the AND has a constant on its RHS which we can use as a mask.
1500 // If it's a power of 2, then it's the same as checking a specific bit.
1501 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1502 auto MaybeBit = getIConstantVRegValWithLookThrough(
1503 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1504 if (!MaybeBit)
1505 return false;
1506
1507 int32_t Bit = MaybeBit->Value.exactLogBase2();
1508 if (Bit < 0)
1509 return false;
1510
1511 Register TestReg = AndInst.getOperand(1).getReg();
1512
1513 // Emit a TB(N)Z.
1514 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1515 return true;
1516}
1517
1518MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1519 bool IsNegative,
1520 MachineBasicBlock *DestMBB,
1521 MachineIRBuilder &MIB) const {
1522 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1522, __extension__ __PRETTY_FUNCTION__))
;
1523 MachineRegisterInfo &MRI = *MIB.getMRI();
1524 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
1525 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
1526 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 auto Ty = MRI.getType(CompareReg);
1528 unsigned Width = Ty.getSizeInBits();
1529 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1529, __extension__ __PRETTY_FUNCTION__))
;
1530 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1530, __extension__ __PRETTY_FUNCTION__))
;
1531 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1532 {AArch64::CBNZW, AArch64::CBNZX}};
1533 unsigned Opc = OpcTable[IsNegative][Width == 64];
1534 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1535 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1536 return &*BranchMI;
1537}
1538
1539bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1540 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1541 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1541, __extension__ __PRETTY_FUNCTION__))
;
1542 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1542, __extension__ __PRETTY_FUNCTION__))
;
1543 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1544 // totally clean. Some of them require two branches to implement.
1545 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1546 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1547 Pred);
1548 AArch64CC::CondCode CC1, CC2;
1549 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1550 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1551 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1552 if (CC2 != AArch64CC::AL)
1553 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1554 I.eraseFromParent();
1555 return true;
1556}
1557
1558bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1559 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1560 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1560, __extension__ __PRETTY_FUNCTION__))
;
1561 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1561, __extension__ __PRETTY_FUNCTION__))
;
1562 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1563 //
1564 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1565 // instructions will not be produced, as they are conditional branch
1566 // instructions that do not set flags.
1567 if (!ProduceNonFlagSettingCondBr)
1568 return false;
1569
1570 MachineRegisterInfo &MRI = *MIB.getMRI();
1571 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1572 auto Pred =
1573 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1574 Register LHS = ICmp.getOperand(2).getReg();
1575 Register RHS = ICmp.getOperand(3).getReg();
1576
1577 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1578 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1579 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1580
1581 // When we can emit a TB(N)Z, prefer that.
1582 //
1583 // Handle non-commutative condition codes first.
1584 // Note that we don't want to do this when we have a G_AND because it can
1585 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1586 if (VRegAndVal && !AndInst) {
1587 int64_t C = VRegAndVal->Value.getSExtValue();
1588
1589 // When we have a greater-than comparison, we can just test if the msb is
1590 // zero.
1591 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1592 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1593 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1594 I.eraseFromParent();
1595 return true;
1596 }
1597
1598 // When we have a less than comparison, we can just test if the msb is not
1599 // zero.
1600 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1601 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1602 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1603 I.eraseFromParent();
1604 return true;
1605 }
1606 }
1607
1608 // Attempt to handle commutative condition codes. Right now, that's only
1609 // eq/ne.
1610 if (ICmpInst::isEquality(Pred)) {
1611 if (!VRegAndVal) {
1612 std::swap(RHS, LHS);
1613 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1614 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1615 }
1616
1617 if (VRegAndVal && VRegAndVal->Value == 0) {
1618 // If there's a G_AND feeding into this branch, try to fold it away by
1619 // emitting a TB(N)Z instead.
1620 //
1621 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1622 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1623 // would be redundant.
1624 if (AndInst &&
1625 tryOptAndIntoCompareBranch(
1626 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1627 I.eraseFromParent();
1628 return true;
1629 }
1630
1631 // Otherwise, try to emit a CB(N)Z instead.
1632 auto LHSTy = MRI.getType(LHS);
1633 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1634 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1635 I.eraseFromParent();
1636 return true;
1637 }
1638 }
1639 }
1640
1641 return false;
1642}
1643
1644bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1645 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1646 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1646, __extension__ __PRETTY_FUNCTION__))
;
1647 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1647, __extension__ __PRETTY_FUNCTION__))
;
1648 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1649 return true;
1650
1651 // Couldn't optimize. Emit a compare + a Bcc.
1652 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1653 auto PredOp = ICmp.getOperand(1);
1654 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1655 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1656 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1657 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1658 I.eraseFromParent();
1659 return true;
1660}
1661
1662bool AArch64InstructionSelector::selectCompareBranch(
1663 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1664 Register CondReg = I.getOperand(0).getReg();
1665 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1666 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1667 CondReg = CCMI->getOperand(1).getReg();
1668 CCMI = MRI.getVRegDef(CondReg);
1669 }
1670
1671 // Try to select the G_BRCOND using whatever is feeding the condition if
1672 // possible.
1673 unsigned CCMIOpc = CCMI->getOpcode();
1674 if (CCMIOpc == TargetOpcode::G_FCMP)
1675 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1676 if (CCMIOpc == TargetOpcode::G_ICMP)
1677 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1678
1679 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1680 // instructions will not be produced, as they are conditional branch
1681 // instructions that do not set flags.
1682 if (ProduceNonFlagSettingCondBr) {
1683 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1684 I.getOperand(1).getMBB(), MIB);
1685 I.eraseFromParent();
1686 return true;
1687 }
1688
1689 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1690 auto TstMI =
1691 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1692 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1693 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1694 .addImm(AArch64CC::EQ)
1695 .addMBB(I.getOperand(1).getMBB());
1696 I.eraseFromParent();
1697 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1698}
1699
1700/// Returns the element immediate value of a vector shift operand if found.
1701/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1702static Optional<int64_t> getVectorShiftImm(Register Reg,
1703 MachineRegisterInfo &MRI) {
1704 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
;
1705 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1706 assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand"
) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1706, __extension__ __PRETTY_FUNCTION__))
;
1707 return getAArch64VectorSplatScalar(*OpMI, MRI);
1708}
1709
1710/// Matches and returns the shift immediate value for a SHL instruction given
1711/// a shift operand.
1712static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1713 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1714 if (!ShiftImm)
1715 return None;
1716 // Check the immediate is in range for a SHL.
1717 int64_t Imm = *ShiftImm;
1718 if (Imm < 0)
1719 return None;
1720 switch (SrcTy.getElementType().getSizeInBits()) {
1721 default:
1722 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1723 return None;
1724 case 8:
1725 if (Imm > 7)
1726 return None;
1727 break;
1728 case 16:
1729 if (Imm > 15)
1730 return None;
1731 break;
1732 case 32:
1733 if (Imm > 31)
1734 return None;
1735 break;
1736 case 64:
1737 if (Imm > 63)
1738 return None;
1739 break;
1740 }
1741 return Imm;
1742}
1743
1744bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1745 MachineRegisterInfo &MRI) {
1746 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1746, __extension__ __PRETTY_FUNCTION__))
;
1747 Register DstReg = I.getOperand(0).getReg();
1748 const LLT Ty = MRI.getType(DstReg);
1749 Register Src1Reg = I.getOperand(1).getReg();
1750 Register Src2Reg = I.getOperand(2).getReg();
1751
1752 if (!Ty.isVector())
1753 return false;
1754
1755 // Check if we have a vector of constants on RHS that we can select as the
1756 // immediate form.
1757 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1758
1759 unsigned Opc = 0;
1760 if (Ty == LLT::fixed_vector(2, 64)) {
1761 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1762 } else if (Ty == LLT::fixed_vector(4, 32)) {
1763 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1764 } else if (Ty == LLT::fixed_vector(2, 32)) {
1765 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1766 } else if (Ty == LLT::fixed_vector(4, 16)) {
1767 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1768 } else if (Ty == LLT::fixed_vector(8, 16)) {
1769 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1770 } else if (Ty == LLT::fixed_vector(16, 8)) {
1771 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1772 } else if (Ty == LLT::fixed_vector(8, 8)) {
1773 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1774 } else {
1775 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1776 return false;
1777 }
1778
1779 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1780 if (ImmVal)
1781 Shl.addImm(*ImmVal);
1782 else
1783 Shl.addUse(Src2Reg);
1784 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1785 I.eraseFromParent();
1786 return true;
1787}
1788
1789bool AArch64InstructionSelector::selectVectorAshrLshr(
1790 MachineInstr &I, MachineRegisterInfo &MRI) {
1791 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1792, __extension__ __PRETTY_FUNCTION__))
1792 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1792, __extension__ __PRETTY_FUNCTION__))
;
1793 Register DstReg = I.getOperand(0).getReg();
1794 const LLT Ty = MRI.getType(DstReg);
1795 Register Src1Reg = I.getOperand(1).getReg();
1796 Register Src2Reg = I.getOperand(2).getReg();
1797
1798 if (!Ty.isVector())
1799 return false;
1800
1801 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1802
1803 // We expect the immediate case to be lowered in the PostLegalCombiner to
1804 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1805
1806 // There is not a shift right register instruction, but the shift left
1807 // register instruction takes a signed value, where negative numbers specify a
1808 // right shift.
1809
1810 unsigned Opc = 0;
1811 unsigned NegOpc = 0;
1812 const TargetRegisterClass *RC =
1813 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1814 if (Ty == LLT::fixed_vector(2, 64)) {
1815 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1816 NegOpc = AArch64::NEGv2i64;
1817 } else if (Ty == LLT::fixed_vector(4, 32)) {
1818 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1819 NegOpc = AArch64::NEGv4i32;
1820 } else if (Ty == LLT::fixed_vector(2, 32)) {
1821 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1822 NegOpc = AArch64::NEGv2i32;
1823 } else if (Ty == LLT::fixed_vector(4, 16)) {
1824 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1825 NegOpc = AArch64::NEGv4i16;
1826 } else if (Ty == LLT::fixed_vector(8, 16)) {
1827 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1828 NegOpc = AArch64::NEGv8i16;
1829 } else if (Ty == LLT::fixed_vector(16, 8)) {
1830 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1831 NegOpc = AArch64::NEGv16i8;
1832 } else if (Ty == LLT::fixed_vector(8, 8)) {
1833 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1834 NegOpc = AArch64::NEGv8i8;
1835 } else {
1836 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1837 return false;
1838 }
1839
1840 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1841 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1842 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1843 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1844 I.eraseFromParent();
1845 return true;
1846}
1847
1848bool AArch64InstructionSelector::selectVaStartAAPCS(
1849 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1850 return false;
1851}
1852
1853bool AArch64InstructionSelector::selectVaStartDarwin(
1854 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1855 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1856 Register ListReg = I.getOperand(0).getReg();
1857
1858 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1859
1860 auto MIB =
1861 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1862 .addDef(ArgsAddrReg)
1863 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1864 .addImm(0)
1865 .addImm(0);
1866
1867 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1868
1869 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1870 .addUse(ArgsAddrReg)
1871 .addUse(ListReg)
1872 .addImm(0)
1873 .addMemOperand(*I.memoperands_begin());
1874
1875 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1876 I.eraseFromParent();
1877 return true;
1878}
1879
1880void AArch64InstructionSelector::materializeLargeCMVal(
1881 MachineInstr &I, const Value *V, unsigned OpFlags) {
1882 MachineBasicBlock &MBB = *I.getParent();
1883 MachineFunction &MF = *MBB.getParent();
1884 MachineRegisterInfo &MRI = MF.getRegInfo();
1885
1886 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1887 MovZ->addOperand(MF, I.getOperand(1));
1888 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1889 AArch64II::MO_NC);
1890 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1891 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1892
1893 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1894 Register ForceDstReg) {
1895 Register DstReg = ForceDstReg
1896 ? ForceDstReg
1897 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1898 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1899 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1900 MovI->addOperand(MF, MachineOperand::CreateGA(
1901 GV, MovZ->getOperand(1).getOffset(), Flags));
1902 } else {
1903 MovI->addOperand(
1904 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1905 MovZ->getOperand(1).getOffset(), Flags));
1906 }
1907 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1908 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1909 return DstReg;
1910 };
1911 Register DstReg = BuildMovK(MovZ.getReg(0),
1912 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1913 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1914 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1915}
1916
1917bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1918 MachineBasicBlock &MBB = *I.getParent();
1919 MachineFunction &MF = *MBB.getParent();
1920 MachineRegisterInfo &MRI = MF.getRegInfo();
1921
1922 switch (I.getOpcode()) {
1923 case TargetOpcode::G_STORE: {
1924 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1925 MachineOperand &SrcOp = I.getOperand(0);
1926 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1927 // Allow matching with imported patterns for stores of pointers. Unlike
1928 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1929 // and constrain.
1930 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1931 Register NewSrc = Copy.getReg(0);
1932 SrcOp.setReg(NewSrc);
1933 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1934 Changed = true;
1935 }
1936 return Changed;
1937 }
1938 case TargetOpcode::G_PTR_ADD:
1939 return convertPtrAddToAdd(I, MRI);
1940 case TargetOpcode::G_LOAD: {
1941 // For scalar loads of pointers, we try to convert the dest type from p0
1942 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1943 // conversion, this should be ok because all users should have been
1944 // selected already, so the type doesn't matter for them.
1945 Register DstReg = I.getOperand(0).getReg();
1946 const LLT DstTy = MRI.getType(DstReg);
1947 if (!DstTy.isPointer())
1948 return false;
1949 MRI.setType(DstReg, LLT::scalar(64));
1950 return true;
1951 }
1952 case AArch64::G_DUP: {
1953 // Convert the type from p0 to s64 to help selection.
1954 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1955 if (!DstTy.getElementType().isPointer())
1956 return false;
1957 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1958 MRI.setType(I.getOperand(0).getReg(),
1959 DstTy.changeElementType(LLT::scalar(64)));
1960 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1961 I.getOperand(1).setReg(NewSrc.getReg(0));
1962 return true;
1963 }
1964 case TargetOpcode::G_UITOFP:
1965 case TargetOpcode::G_SITOFP: {
1966 // If both source and destination regbanks are FPR, then convert the opcode
1967 // to G_SITOF so that the importer can select it to an fpr variant.
1968 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1969 // copy.
1970 Register SrcReg = I.getOperand(1).getReg();
1971 LLT SrcTy = MRI.getType(SrcReg);
1972 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1973 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1974 return false;
1975
1976 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1977 if (I.getOpcode() == TargetOpcode::G_SITOFP)
1978 I.setDesc(TII.get(AArch64::G_SITOF));
1979 else
1980 I.setDesc(TII.get(AArch64::G_UITOF));
1981 return true;
1982 }
1983 return false;
1984 }
1985 default:
1986 return false;
1987 }
1988}
1989
1990/// This lowering tries to look for G_PTR_ADD instructions and then converts
1991/// them to a standard G_ADD with a COPY on the source.
1992///
1993/// The motivation behind this is to expose the add semantics to the imported
1994/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1995/// because the selector works bottom up, uses before defs. By the time we
1996/// end up trying to select a G_PTR_ADD, we should have already attempted to
1997/// fold this into addressing modes and were therefore unsuccessful.
1998bool AArch64InstructionSelector::convertPtrAddToAdd(
1999 MachineInstr &I, MachineRegisterInfo &MRI) {
2000 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2000, __extension__ __PRETTY_FUNCTION__))
;
2001 Register DstReg = I.getOperand(0).getReg();
2002 Register AddOp1Reg = I.getOperand(1).getReg();
2003 const LLT PtrTy = MRI.getType(DstReg);
2004 if (PtrTy.getAddressSpace() != 0)
2005 return false;
2006
2007 const LLT CastPtrTy =
2008 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2009 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2010 // Set regbanks on the registers.
2011 if (PtrTy.isVector())
2012 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2013 else
2014 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2015
2016 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2017 // %dst(intty) = G_ADD %intbase, off
2018 I.setDesc(TII.get(TargetOpcode::G_ADD));
2019 MRI.setType(DstReg, CastPtrTy);
2020 I.getOperand(1).setReg(PtrToInt.getReg(0));
2021 if (!select(*PtrToInt)) {
2022 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2023 return false;
2024 }
2025
2026 // Also take the opportunity here to try to do some optimization.
2027 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2028 Register NegatedReg;
2029 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2030 return true;
2031 I.getOperand(2).setReg(NegatedReg);
2032 I.setDesc(TII.get(TargetOpcode::G_SUB));
2033 return true;
2034}
2035
2036bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2037 MachineRegisterInfo &MRI) {
2038 // We try to match the immediate variant of LSL, which is actually an alias
2039 // for a special case of UBFM. Otherwise, we fall back to the imported
2040 // selector which will match the register variant.
2041 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2041, __extension__ __PRETTY_FUNCTION__))
;
2042 const auto &MO = I.getOperand(2);
2043 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2044 if (!VRegAndVal)
2045 return false;
2046
2047 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2048 if (DstTy.isVector())
2049 return false;
2050 bool Is64Bit = DstTy.getSizeInBits() == 64;
2051 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2052 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2053
2054 if (!Imm1Fn || !Imm2Fn)
2055 return false;
2056
2057 auto NewI =
2058 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2059 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2060
2061 for (auto &RenderFn : *Imm1Fn)
2062 RenderFn(NewI);
2063 for (auto &RenderFn : *Imm2Fn)
2064 RenderFn(NewI);
2065
2066 I.eraseFromParent();
2067 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2068}
2069
2070bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2071 MachineInstr &I, MachineRegisterInfo &MRI) {
2072 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2072, __extension__ __PRETTY_FUNCTION__))
;
2073 // If we're storing a scalar, it doesn't matter what register bank that
2074 // scalar is on. All that matters is the size.
2075 //
2076 // So, if we see something like this (with a 32-bit scalar as an example):
2077 //
2078 // %x:gpr(s32) = ... something ...
2079 // %y:fpr(s32) = COPY %x:gpr(s32)
2080 // G_STORE %y:fpr(s32)
2081 //
2082 // We can fix this up into something like this:
2083 //
2084 // G_STORE %x:gpr(s32)
2085 //
2086 // And then continue the selection process normally.
2087 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2088 if (!DefDstReg.isValid())
2089 return false;
2090 LLT DefDstTy = MRI.getType(DefDstReg);
2091 Register StoreSrcReg = I.getOperand(0).getReg();
2092 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2093
2094 // If we get something strange like a physical register, then we shouldn't
2095 // go any further.
2096 if (!DefDstTy.isValid())
2097 return false;
2098
2099 // Are the source and dst types the same size?
2100 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2101 return false;
2102
2103 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2104 RBI.getRegBank(DefDstReg, MRI, TRI))
2105 return false;
2106
2107 // We have a cross-bank copy, which is entering a store. Let's fold it.
2108 I.getOperand(0).setReg(DefDstReg);
2109 return true;
2110}
2111
2112bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2113 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2113, __extension__ __PRETTY_FUNCTION__))
;
2114 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2114, __extension__ __PRETTY_FUNCTION__))
;
2115
2116 MachineBasicBlock &MBB = *I.getParent();
2117 MachineFunction &MF = *MBB.getParent();
2118 MachineRegisterInfo &MRI = MF.getRegInfo();
2119
2120 switch (I.getOpcode()) {
2121 case AArch64::G_DUP: {
2122 // Before selecting a DUP instruction, check if it is better selected as a
2123 // MOV or load from a constant pool.
2124 Register Src = I.getOperand(1).getReg();
2125 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2126 if (!ValAndVReg)
2127 return false;
2128 LLVMContext &Ctx = MF.getFunction().getContext();
2129 Register Dst = I.getOperand(0).getReg();
2130 auto *CV = ConstantDataVector::getSplat(
2131 MRI.getType(Dst).getNumElements(),
2132 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2133 ValAndVReg->Value));
2134 if (!emitConstantVector(Dst, CV, MIB, MRI))
2135 return false;
2136 I.eraseFromParent();
2137 return true;
2138 }
2139 case TargetOpcode::G_SEXT:
2140 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2141 // over a normal extend.
2142 if (selectUSMovFromExtend(I, MRI))
2143 return true;
2144 return false;
2145 case TargetOpcode::G_BR:
2146 return false;
2147 case TargetOpcode::G_SHL:
2148 return earlySelectSHL(I, MRI);
2149 case TargetOpcode::G_CONSTANT: {
2150 bool IsZero = false;
2151 if (I.getOperand(1).isCImm())
2152 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2153 else if (I.getOperand(1).isImm())
2154 IsZero = I.getOperand(1).getImm() == 0;
2155
2156 if (!IsZero)
2157 return false;
2158
2159 Register DefReg = I.getOperand(0).getReg();
2160 LLT Ty = MRI.getType(DefReg);
2161 if (Ty.getSizeInBits() == 64) {
2162 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2163 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2164 } else if (Ty.getSizeInBits() == 32) {
2165 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2166 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2167 } else
2168 return false;
2169
2170 I.setDesc(TII.get(TargetOpcode::COPY));
2171 return true;
2172 }
2173
2174 case TargetOpcode::G_ADD: {
2175 // Check if this is being fed by a G_ICMP on either side.
2176 //
2177 // (cmp pred, x, y) + z
2178 //
2179 // In the above case, when the cmp is true, we increment z by 1. So, we can
2180 // fold the add into the cset for the cmp by using cinc.
2181 //
2182 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2183 Register AddDst = I.getOperand(0).getReg();
2184 Register AddLHS = I.getOperand(1).getReg();
2185 Register AddRHS = I.getOperand(2).getReg();
2186 // Only handle scalars.
2187 LLT Ty = MRI.getType(AddLHS);
2188 if (Ty.isVector())
2189 return false;
2190 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2191 // bits.
2192 unsigned Size = Ty.getSizeInBits();
2193 if (Size != 32 && Size != 64)
2194 return false;
2195 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2196 if (!MRI.hasOneNonDBGUse(Reg))
2197 return nullptr;
2198 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2199 // compare.
2200 if (Size == 32)
2201 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2202 // We model scalar compares using 32-bit destinations right now.
2203 // If it's a 64-bit compare, it'll have 64-bit sources.
2204 Register ZExt;
2205 if (!mi_match(Reg, MRI,
2206 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2207 return nullptr;
2208 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2209 if (!Cmp ||
2210 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2211 return nullptr;
2212 return Cmp;
2213 };
2214 // Try to match
2215 // z + (cmp pred, x, y)
2216 MachineInstr *Cmp = MatchCmp(AddRHS);
2217 if (!Cmp) {
2218 // (cmp pred, x, y) + z
2219 std::swap(AddLHS, AddRHS);
2220 Cmp = MatchCmp(AddRHS);
2221 if (!Cmp)
2222 return false;
2223 }
2224 auto &PredOp = Cmp->getOperand(1);
2225 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2226 const AArch64CC::CondCode InvCC =
2227 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2228 MIB.setInstrAndDebugLoc(I);
2229 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2230 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2231 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2232 I.eraseFromParent();
2233 return true;
2234 }
2235 case TargetOpcode::G_OR: {
2236 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2237 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2238 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2239 Register Dst = I.getOperand(0).getReg();
2240 LLT Ty = MRI.getType(Dst);
2241
2242 if (!Ty.isScalar())
2243 return false;
2244
2245 unsigned Size = Ty.getSizeInBits();
2246 if (Size != 32 && Size != 64)
2247 return false;
2248
2249 Register ShiftSrc;
2250 int64_t ShiftImm;
2251 Register MaskSrc;
2252 int64_t MaskImm;
2253 if (!mi_match(
2254 Dst, MRI,
2255 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2256 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2257 return false;
2258
2259 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2260 return false;
2261
2262 int64_t Immr = Size - ShiftImm;
2263 int64_t Imms = Size - ShiftImm - 1;
2264 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2265 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2266 I.eraseFromParent();
2267 return true;
2268 }
2269 default:
2270 return false;
2271 }
2272}
2273
2274bool AArch64InstructionSelector::select(MachineInstr &I) {
2275 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2275, __extension__ __PRETTY_FUNCTION__))
;
2276 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2276, __extension__ __PRETTY_FUNCTION__))
;
2277
2278 MachineBasicBlock &MBB = *I.getParent();
2279 MachineFunction &MF = *MBB.getParent();
2280 MachineRegisterInfo &MRI = MF.getRegInfo();
2281
2282 const AArch64Subtarget *Subtarget =
2283 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2284 if (Subtarget->requiresStrictAlign()) {
2285 // We don't support this feature yet.
2286 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2287 return false;
2288 }
2289
2290 MIB.setInstrAndDebugLoc(I);
2291
2292 unsigned Opcode = I.getOpcode();
2293 // G_PHI requires same handling as PHI
2294 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2295 // Certain non-generic instructions also need some special handling.
2296
2297 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2298 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2299
2300 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2301 const Register DefReg = I.getOperand(0).getReg();
2302 const LLT DefTy = MRI.getType(DefReg);
2303
2304 const RegClassOrRegBank &RegClassOrBank =
2305 MRI.getRegClassOrRegBank(DefReg);
2306
2307 const TargetRegisterClass *DefRC
2308 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2309 if (!DefRC) {
2310 if (!DefTy.isValid()) {
2311 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2312 return false;
2313 }
2314 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2315 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2316 if (!DefRC) {
2317 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2318 return false;
2319 }
2320 }
2321
2322 I.setDesc(TII.get(TargetOpcode::PHI));
2323
2324 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2325 }
2326
2327 if (I.isCopy())
2328 return selectCopy(I, TII, MRI, TRI, RBI);
2329
2330 return true;
2331 }
2332
2333
2334 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2335 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2336 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2337 return false;
2338 }
2339
2340 // Try to do some lowering before we start instruction selecting. These
2341 // lowerings are purely transformations on the input G_MIR and so selection
2342 // must continue after any modification of the instruction.
2343 if (preISelLower(I)) {
2344 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2345 }
2346
2347 // There may be patterns where the importer can't deal with them optimally,
2348 // but does select it to a suboptimal sequence so our custom C++ selection
2349 // code later never has a chance to work on it. Therefore, we have an early
2350 // selection attempt here to give priority to certain selection routines
2351 // over the imported ones.
2352 if (earlySelect(I))
2353 return true;
2354
2355 if (selectImpl(I, *CoverageInfo))
2356 return true;
2357
2358 LLT Ty =
2359 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2360
2361 switch (Opcode) {
2362 case TargetOpcode::G_SBFX:
2363 case TargetOpcode::G_UBFX: {
2364 static const unsigned OpcTable[2][2] = {
2365 {AArch64::UBFMWri, AArch64::UBFMXri},
2366 {AArch64::SBFMWri, AArch64::SBFMXri}};
2367 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2368 unsigned Size = Ty.getSizeInBits();
2369 unsigned Opc = OpcTable[IsSigned][Size == 64];
2370 auto Cst1 =
2371 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2372 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2372, __extension__ __PRETTY_FUNCTION__))
;
2373 auto Cst2 =
2374 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2375 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2375, __extension__ __PRETTY_FUNCTION__))
;
2376 auto LSB = Cst1->Value.getZExtValue();
2377 auto Width = Cst2->Value.getZExtValue();
2378 auto BitfieldInst =
2379 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2380 .addImm(LSB)
2381 .addImm(LSB + Width - 1);
2382 I.eraseFromParent();
2383 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2384 }
2385 case TargetOpcode::G_BRCOND:
2386 return selectCompareBranch(I, MF, MRI);
2387
2388 case TargetOpcode::G_BRINDIRECT: {
2389 I.setDesc(TII.get(AArch64::BR));
2390 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2391 }
2392
2393 case TargetOpcode::G_BRJT:
2394 return selectBrJT(I, MRI);
2395
2396 case AArch64::G_ADD_LOW: {
2397 // This op may have been separated from it's ADRP companion by the localizer
2398 // or some other code motion pass. Given that many CPUs will try to
2399 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2400 // which will later be expanded into an ADRP+ADD pair after scheduling.
2401 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2402 if (BaseMI->getOpcode() != AArch64::ADRP) {
2403 I.setDesc(TII.get(AArch64::ADDXri));
2404 I.addOperand(MachineOperand::CreateImm(0));
2405 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2406 }
2407 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2408, __extension__ __PRETTY_FUNCTION__))
2408 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2408, __extension__ __PRETTY_FUNCTION__))
;
2409 auto Op1 = BaseMI->getOperand(1);
2410 auto Op2 = I.getOperand(2);
2411 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2412 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2413 Op1.getTargetFlags())
2414 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2415 Op2.getTargetFlags());
2416 I.eraseFromParent();
2417 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2418 }
2419
2420 case TargetOpcode::G_BSWAP: {
2421 // Handle vector types for G_BSWAP directly.
2422 Register DstReg = I.getOperand(0).getReg();
2423 LLT DstTy = MRI.getType(DstReg);
2424
2425 // We should only get vector types here; everything else is handled by the
2426 // importer right now.
2427 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2428 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2429 return false;
2430 }
2431
2432 // Only handle 4 and 2 element vectors for now.
2433 // TODO: 16-bit elements.
2434 unsigned NumElts = DstTy.getNumElements();
2435 if (NumElts != 4 && NumElts != 2) {
2436 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2437 return false;
2438 }
2439
2440 // Choose the correct opcode for the supported types. Right now, that's
2441 // v2s32, v4s32, and v2s64.
2442 unsigned Opc = 0;
2443 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2444 if (EltSize == 32)
2445 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2446 : AArch64::REV32v16i8;
2447 else if (EltSize == 64)
2448 Opc = AArch64::REV64v16i8;
2449
2450 // We should always get something by the time we get here...
2451 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2451, __extension__ __PRETTY_FUNCTION__))
;
2452
2453 I.setDesc(TII.get(Opc));
2454 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2455 }
2456
2457 case TargetOpcode::G_FCONSTANT:
2458 case TargetOpcode::G_CONSTANT: {
2459 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2460
2461 const LLT s8 = LLT::scalar(8);
2462 const LLT s16 = LLT::scalar(16);
2463 const LLT s32 = LLT::scalar(32);
2464 const LLT s64 = LLT::scalar(64);
2465 const LLT s128 = LLT::scalar(128);
2466 const LLT p0 = LLT::pointer(0, 64);
2467
2468 const Register DefReg = I.getOperand(0).getReg();
2469 const LLT DefTy = MRI.getType(DefReg);
2470 const unsigned DefSize = DefTy.getSizeInBits();
2471 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2472
2473 // FIXME: Redundant check, but even less readable when factored out.
2474 if (isFP) {
2475 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2476 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2477 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2478 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2479 return false;
2480 }
2481
2482 if (RB.getID() != AArch64::FPRRegBankID) {
2483 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2484 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2485 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2486 return false;
2487 }
2488
2489 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2490 // can be sure tablegen works correctly and isn't rescued by this code.
2491 // 0.0 is not covered by tablegen for FP128. So we will handle this
2492 // scenario in the code here.
2493 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2494 return false;
2495 } else {
2496 // s32 and s64 are covered by tablegen.
2497 if (Ty != p0 && Ty != s8 && Ty != s16) {
2498 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2499 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2500 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2501 return false;
2502 }
2503
2504 if (RB.getID() != AArch64::GPRRegBankID) {
2505 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2506 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2507 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2508 return false;
2509 }
2510 }
2511
2512 if (isFP) {
2513 const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2514 // For 16, 64, and 128b values, emit a constant pool load.
2515 switch (DefSize) {
2516 default:
2517 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2517)
;
2518 case 32:
2519 // For s32, use a cp load if we have optsize/minsize.
2520 if (!shouldOptForSize(&MF))
2521 break;
2522 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2523 case 16:
2524 case 64:
2525 case 128: {
2526 auto *FPImm = I.getOperand(1).getFPImm();
2527 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2528 if (!LoadMI) {
2529 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2530 return false;
2531 }
2532 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2533 I.eraseFromParent();
2534 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2535 }
2536 }
2537
2538 // Either emit a FMOV, or emit a copy to emit a normal mov.
2539 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2540, __extension__ __PRETTY_FUNCTION__))
2540 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2540, __extension__ __PRETTY_FUNCTION__))
;
2541 const Register DefGPRReg =
2542 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2543 MachineOperand &RegOp = I.getOperand(0);
2544 RegOp.setReg(DefGPRReg);
2545 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2546 MIB.buildCopy({DefReg}, {DefGPRReg});
2547
2548 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2549 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2550 return false;
2551 }
2552
2553 MachineOperand &ImmOp = I.getOperand(1);
2554 // FIXME: Is going through int64_t always correct?
2555 ImmOp.ChangeToImmediate(
2556 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2557 } else if (I.getOperand(1).isCImm()) {
2558 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2559 I.getOperand(1).ChangeToImmediate(Val);
2560 } else if (I.getOperand(1).isImm()) {
2561 uint64_t Val = I.getOperand(1).getImm();
2562 I.getOperand(1).ChangeToImmediate(Val);
2563 }
2564
2565 const unsigned MovOpc =
2566 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2567 I.setDesc(TII.get(MovOpc));
2568 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2569 return true;
2570 }
2571 case TargetOpcode::G_EXTRACT: {
2572 Register DstReg = I.getOperand(0).getReg();
2573 Register SrcReg = I.getOperand(1).getReg();
2574 LLT SrcTy = MRI.getType(SrcReg);
2575 LLT DstTy = MRI.getType(DstReg);
2576 (void)DstTy;
2577 unsigned SrcSize = SrcTy.getSizeInBits();
2578
2579 if (SrcTy.getSizeInBits() > 64) {
2580 // This should be an extract of an s128, which is like a vector extract.
2581 if (SrcTy.getSizeInBits() != 128)
2582 return false;
2583 // Only support extracting 64 bits from an s128 at the moment.
2584 if (DstTy.getSizeInBits() != 64)
2585 return false;
2586
2587 unsigned Offset = I.getOperand(2).getImm();
2588 if (Offset % 64 != 0)
2589 return false;
2590
2591 // Check we have the right regbank always.
2592 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2593 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2594 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2594, __extension__ __PRETTY_FUNCTION__))
;
2595
2596 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2597 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2598 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2599 I.eraseFromParent();
2600 return true;
2601 }
2602
2603 // Emit the same code as a vector extract.
2604 // Offset must be a multiple of 64.
2605 unsigned LaneIdx = Offset / 64;
2606 MachineInstr *Extract = emitExtractVectorElt(
2607 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2608 if (!Extract)
2609 return false;
2610 I.eraseFromParent();
2611 return true;
2612 }
2613
2614 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2615 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2616 Ty.getSizeInBits() - 1);
2617
2618 if (SrcSize < 64) {
2619 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2620, __extension__ __PRETTY_FUNCTION__))
2620 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2620, __extension__ __PRETTY_FUNCTION__))
;
2621 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2622 }
2623
2624 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2625 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2626 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2627 .addReg(DstReg, 0, AArch64::sub_32);
2628 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2629 AArch64::GPR32RegClass, MRI);
2630 I.getOperand(0).setReg(DstReg);
2631
2632 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2633 }
2634
2635 case TargetOpcode::G_INSERT: {
2636 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2637 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2638 unsigned DstSize = DstTy.getSizeInBits();
2639 // Larger inserts are vectors, same-size ones should be something else by
2640 // now (split up or turned into COPYs).
2641 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2642 return false;
2643
2644 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2645 unsigned LSB = I.getOperand(3).getImm();
2646 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2647 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2648 MachineInstrBuilder(MF, I).addImm(Width - 1);
2649
2650 if (DstSize < 64) {
2651 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2652, __extension__ __PRETTY_FUNCTION__))
2652 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2652, __extension__ __PRETTY_FUNCTION__))
;
2653 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2654 }
2655
2656 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2657 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2658 TII.get(AArch64::SUBREG_TO_REG))
2659 .addDef(SrcReg)
2660 .addImm(0)
2661 .addUse(I.getOperand(2).getReg())
2662 .addImm(AArch64::sub_32);
2663 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2664 AArch64::GPR32RegClass, MRI);
2665 I.getOperand(2).setReg(SrcReg);
2666
2667 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2668 }
2669 case TargetOpcode::G_FRAME_INDEX: {
2670 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2671 if (Ty != LLT::pointer(0, 64)) {
2672 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2673 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2674 return false;
2675 }
2676 I.setDesc(TII.get(AArch64::ADDXri));
2677
2678 // MOs for a #0 shifted immediate.
2679 I.addOperand(MachineOperand::CreateImm(0));
2680 I.addOperand(MachineOperand::CreateImm(0));
2681
2682 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2683 }
2684
2685 case TargetOpcode::G_GLOBAL_VALUE: {
2686 auto GV = I.getOperand(1).getGlobal();
2687 if (GV->isThreadLocal())
2688 return selectTLSGlobalValue(I, MRI);
2689
2690 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2691 if (OpFlags & AArch64II::MO_GOT) {
2692 I.setDesc(TII.get(AArch64::LOADgot));
2693 I.getOperand(1).setTargetFlags(OpFlags);
2694 } else if (TM.getCodeModel() == CodeModel::Large) {
2695 // Materialize the global using movz/movk instructions.
2696 materializeLargeCMVal(I, GV, OpFlags);
2697 I.eraseFromParent();
2698 return true;
2699 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2700 I.setDesc(TII.get(AArch64::ADR));
2701 I.getOperand(1).setTargetFlags(OpFlags);
2702 } else {
2703 I.setDesc(TII.get(AArch64::MOVaddr));
2704 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2705 MachineInstrBuilder MIB(MF, I);
2706 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2707 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2708 }
2709 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2710 }
2711
2712 case TargetOpcode::G_ZEXTLOAD:
2713 case TargetOpcode::G_LOAD:
2714 case TargetOpcode::G_STORE: {
2715 GLoadStore &LdSt = cast<GLoadStore>(I);
2716 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2717 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2718
2719 if (PtrTy != LLT::pointer(0, 64)) {
2720 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2721 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2722 return false;
2723 }
2724
2725 uint64_t MemSizeInBytes = LdSt.getMemSize();
2726 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2727 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2728
2729 // Need special instructions for atomics that affect ordering.
2730 if (Order != AtomicOrdering::NotAtomic &&
2731 Order != AtomicOrdering::Unordered &&
2732 Order != AtomicOrdering::Monotonic) {
2733 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2733, __extension__ __PRETTY_FUNCTION__))
;
2734 if (MemSizeInBytes > 64)
2735 return false;
2736
2737 if (isa<GLoad>(LdSt)) {
2738 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2739 AArch64::LDARW, AArch64::LDARX};
2740 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2741 } else {
2742 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2743 AArch64::STLRW, AArch64::STLRX};
2744 Register ValReg = LdSt.getReg(0);
2745 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2746 // Emit a subreg copy of 32 bits.
2747 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2748 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2749 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2750 I.getOperand(0).setReg(NewVal);
2751 }
2752 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2753 }
2754 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2755 return true;
2756 }
2757
2758#ifndef NDEBUG
2759 const Register PtrReg = LdSt.getPointerReg();
2760 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2761 // Check that the pointer register is valid.
2762 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2763, __extension__ __PRETTY_FUNCTION__))
2763 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2763, __extension__ __PRETTY_FUNCTION__))
;
2764 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2765, __extension__ __PRETTY_FUNCTION__))
2765 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2765, __extension__ __PRETTY_FUNCTION__))
;
2766#endif
2767
2768 const Register ValReg = LdSt.getReg(0);
2769 const LLT ValTy = MRI.getType(ValReg);
2770 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2771
2772 // The code below doesn't support truncating stores, so we need to split it
2773 // again.
2774 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2775 unsigned SubReg;
2776 LLT MemTy = LdSt.getMMO().getMemoryType();
2777 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2778 if (!getSubRegForClass(RC, TRI, SubReg))
2779 return false;
2780
2781 // Generate a subreg copy.
2782 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2783 .addReg(ValReg, 0, SubReg)
2784 .getReg(0);
2785 RBI.constrainGenericRegister(Copy, *RC, MRI);
2786 LdSt.getOperand(0).setReg(Copy);
2787 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2788 // If this is an any-extending load from the FPR bank, split it into a regular
2789 // load + extend.
2790 if (RB.getID() == AArch64::FPRRegBankID) {
2791 unsigned SubReg;
2792 LLT MemTy = LdSt.getMMO().getMemoryType();
2793 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2794 if (!getSubRegForClass(RC, TRI, SubReg))
2795 return false;
2796 Register OldDst = LdSt.getReg(0);
2797 Register NewDst =
2798 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2799 LdSt.getOperand(0).setReg(NewDst);
2800 MRI.setRegBank(NewDst, RB);
2801 // Generate a SUBREG_TO_REG to extend it.
2802 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2803 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2804 .addImm(0)
2805 .addUse(NewDst)
2806 .addImm(SubReg);
2807 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2808 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2809 MIB.setInstr(LdSt);
2810 }
2811 }
2812
2813 // Helper lambda for partially selecting I. Either returns the original
2814 // instruction with an updated opcode, or a new instruction.
2815 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2816 bool IsStore = isa<GStore>(I);
1
Assuming 'I' is not a 'GStore'
2817 const unsigned NewOpc =
2818 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2819 if (NewOpc == I.getOpcode())
2
Taking false branch
2820 return nullptr;
2821 // Check if we can fold anything into the addressing mode.
2822 auto AddrModeFns =
2823 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2824 if (!AddrModeFns) {
2825 // Can't fold anything. Use the original instruction.
2826 I.setDesc(TII.get(NewOpc));
2827 I.addOperand(MachineOperand::CreateImm(0));
2828 return &I;
2829 }
2830
2831 // Folded something. Create a new instruction and return it.
2832 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2833 Register CurValReg = I.getOperand(0).getReg();
2834 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2835 NewInst.cloneMemRefs(I);
2836 for (auto &Fn : *AddrModeFns)
2837 Fn(NewInst);
2838 I.eraseFromParent();
2839 return &*NewInst;
2840 };
2841
2842 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2843 if (!LoadStore)
2844 return false;
2845
2846 // If we're storing a 0, use WZR/XZR.
2847 if (Opcode == TargetOpcode::G_STORE) {
2848 auto CVal = getIConstantVRegValWithLookThrough(
2849 LoadStore->getOperand(0).getReg(), MRI);
2850 if (CVal && CVal->Value == 0) {
2851 switch (LoadStore->getOpcode()) {
2852 case AArch64::STRWui:
2853 case AArch64::STRHHui:
2854 case AArch64::STRBBui:
2855 LoadStore->getOperand(0).setReg(AArch64::WZR);
2856 break;
2857 case AArch64::STRXui:
2858 LoadStore->getOperand(0).setReg(AArch64::XZR);
2859 break;
2860 }
2861 }
2862 }
2863
2864 if (IsZExtLoad) {
2865 // The zextload from a smaller type to i32 should be handled by the
2866 // importer.
2867 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2868 return false;
2869 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2870 // and zero_extend with SUBREG_TO_REG.
2871 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2872 Register DstReg = LoadStore->getOperand(0).getReg();
2873 LoadStore->getOperand(0).setReg(LdReg);
2874
2875 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2876 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2877 .addImm(0)
2878 .addUse(LdReg)
2879 .addImm(AArch64::sub_32);
2880 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2881 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2882 MRI);
2883 }
2884 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2885 }
2886
2887 case TargetOpcode::G_SMULH:
2888 case TargetOpcode::G_UMULH: {
2889 // Reject the various things we don't support yet.
2890 if (unsupportedBinOp(I, RBI, MRI, TRI))
2891 return false;
2892
2893 const Register DefReg = I.getOperand(0).getReg();
2894 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2895
2896 if (RB.getID() != AArch64::GPRRegBankID) {
2897 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2898 return false;
2899 }
2900
2901 if (Ty != LLT::scalar(64)) {
2902 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2903 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2904 return false;
2905 }
2906
2907 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2908 : AArch64::UMULHrr;
2909 I.setDesc(TII.get(NewOpc));
2910
2911 // Now that we selected an opcode, we need to constrain the register
2912 // operands to use appropriate classes.
2913 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2914 }
2915 case TargetOpcode::G_LSHR:
2916 case TargetOpcode::G_ASHR:
2917 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2918 return selectVectorAshrLshr(I, MRI);
2919 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2920 case TargetOpcode::G_SHL:
2921 if (Opcode == TargetOpcode::G_SHL &&
2922 MRI.getType(I.getOperand(0).getReg()).isVector())
2923 return selectVectorSHL(I, MRI);
2924
2925 // These shifts were legalized to have 64 bit shift amounts because we
2926 // want to take advantage of the selection patterns that assume the
2927 // immediates are s64s, however, selectBinaryOp will assume both operands
2928 // will have the same bit size.
2929 {
2930 Register SrcReg = I.getOperand(1).getReg();
2931 Register ShiftReg = I.getOperand(2).getReg();
2932 const LLT ShiftTy = MRI.getType(ShiftReg);
2933 const LLT SrcTy = MRI.getType(SrcReg);
2934 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
2935 ShiftTy.getSizeInBits() == 64) {
2936 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2936, __extension__ __PRETTY_FUNCTION__))
;
2937 assert(MRI.getVRegDef(ShiftReg) &&(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2938, __extension__ __PRETTY_FUNCTION__))
2938 "could not find a vreg definition for shift amount")(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2938, __extension__ __PRETTY_FUNCTION__))
;
2939 // Insert a subregister copy to implement a 64->32 trunc
2940 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
2941 .addReg(ShiftReg, 0, AArch64::sub_32);
2942 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2943 I.getOperand(2).setReg(Trunc.getReg(0));
2944 }
2945 }
2946 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2947 case TargetOpcode::G_FADD:
2948 case TargetOpcode::G_FSUB:
2949 case TargetOpcode::G_FMUL:
2950 case TargetOpcode::G_FDIV:
2951 case TargetOpcode::G_OR: {
2952 // Reject the various things we don't support yet.
2953 if (unsupportedBinOp(I, RBI, MRI, TRI))
2954 return false;
2955
2956 const unsigned OpSize = Ty.getSizeInBits();
2957
2958 const Register DefReg = I.getOperand(0).getReg();
2959 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2960
2961 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2962 if (NewOpc == I.getOpcode())
2963 return false;
2964
2965 I.setDesc(TII.get(NewOpc));
2966 // FIXME: Should the type be always reset in setDesc?
2967
2968 // Now that we selected an opcode, we need to constrain the register
2969 // operands to use appropriate classes.
2970 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2971 }
2972
2973 case TargetOpcode::G_PTR_ADD: {
2974 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2975 I.eraseFromParent();
2976 return true;
2977 }
2978 case TargetOpcode::G_SADDO:
2979 case TargetOpcode::G_UADDO:
2980 case TargetOpcode::G_SSUBO:
2981 case TargetOpcode::G_USUBO: {
2982 // Emit the operation and get the correct condition code.
2983 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2984 I.getOperand(2), I.getOperand(3), MIB);
2985
2986 // Now, put the overflow result in the register given by the first operand
2987 // to the overflow op. CSINC increments the result when the predicate is
2988 // false, so to get the increment when it's true, we need to use the
2989 // inverse. In this case, we want to increment when carry is set.
2990 Register ZReg = AArch64::WZR;
2991 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
2992 getInvertedCondCode(OpAndCC.second), MIB);
2993 I.eraseFromParent();
2994 return true;
2995 }
2996
2997 case TargetOpcode::G_PTRMASK: {
2998 Register MaskReg = I.getOperand(2).getReg();
2999 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3000 // TODO: Implement arbitrary cases
3001 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3002 return false;
3003
3004 uint64_t Mask = *MaskVal;
3005 I.setDesc(TII.get(AArch64::ANDXri));
3006 I.getOperand(2).ChangeToImmediate(
3007 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3008
3009 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3010 }
3011 case TargetOpcode::G_PTRTOINT:
3012 case TargetOpcode::G_TRUNC: {
3013 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3014 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3015
3016 const Register DstReg = I.getOperand(0).getReg();
3017 const Register SrcReg = I.getOperand(1).getReg();
3018
3019 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3020 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3021
3022 if (DstRB.getID() != SrcRB.getID()) {
3023 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3024 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3025 return false;
3026 }
3027
3028 if (DstRB.getID() == AArch64::GPRRegBankID) {
3029 const TargetRegisterClass *DstRC =
3030 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3031 if (!DstRC)
3032 return false;
3033
3034 const TargetRegisterClass *SrcRC =
3035 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3036 if (!SrcRC)
3037 return false;
3038
3039 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3040 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3041 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3042 return false;
3043 }
3044
3045 if (DstRC == SrcRC) {
3046 // Nothing to be done
3047 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3048 SrcTy == LLT::scalar(64)) {
3049 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3049)
;
3050 return false;
3051 } else if (DstRC == &AArch64::GPR32RegClass &&
3052 SrcRC == &AArch64::GPR64RegClass) {
3053 I.getOperand(1).setSubReg(AArch64::sub_32);
3054 } else {
3055 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3056 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3057 return false;
3058 }
3059
3060 I.setDesc(TII.get(TargetOpcode::COPY));
3061 return true;
3062 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3063 if (DstTy == LLT::fixed_vector(4, 16) &&
3064 SrcTy == LLT::fixed_vector(4, 32)) {
3065 I.setDesc(TII.get(AArch64::XTNv4i16));
3066 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3067 return true;
3068 }
3069
3070 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3071 MachineInstr *Extract = emitExtractVectorElt(
3072 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3073 if (!Extract)
3074 return false;
3075 I.eraseFromParent();
3076 return true;
3077 }
3078
3079 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3080 if (Opcode == TargetOpcode::G_PTRTOINT) {
3081 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3081, __extension__ __PRETTY_FUNCTION__))
;
3082 I.setDesc(TII.get(TargetOpcode::COPY));
3083 return selectCopy(I, TII, MRI, TRI, RBI);
3084 }
3085 }
3086
3087 return false;
3088 }
3089
3090 case TargetOpcode::G_ANYEXT: {
3091 if (selectUSMovFromExtend(I, MRI))
3092 return true;
3093
3094 const Register DstReg = I.getOperand(0).getReg();
3095 const Register SrcReg = I.getOperand(1).getReg();
3096
3097 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3098 if (RBDst.getID() != AArch64::GPRRegBankID) {
3099 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3100 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3101 return false;
3102 }
3103
3104 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3105 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3106 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3107 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3108 return false;
3109 }
3110
3111 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3112
3113 if (DstSize == 0) {
3114 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3115 return false;
3116 }
3117
3118 if (DstSize != 64 && DstSize > 32) {
3119 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3120 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3121 return false;
3122 }
3123 // At this point G_ANYEXT is just like a plain COPY, but we need
3124 // to explicitly form the 64-bit value if any.
3125 if (DstSize > 32) {
3126 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3127 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3128 .addDef(ExtSrc)
3129 .addImm(0)
3130 .addUse(SrcReg)
3131 .addImm(AArch64::sub_32);
3132 I.getOperand(1).setReg(ExtSrc);
3133 }
3134 return selectCopy(I, TII, MRI, TRI, RBI);
3135 }
3136
3137 case TargetOpcode::G_ZEXT:
3138 case TargetOpcode::G_SEXT_INREG:
3139 case TargetOpcode::G_SEXT: {
3140 if (selectUSMovFromExtend(I, MRI))
3141 return true;
3142
3143 unsigned Opcode = I.getOpcode();
3144 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3145 const Register DefReg = I.getOperand(0).getReg();
3146 Register SrcReg = I.getOperand(1).getReg();
3147 const LLT DstTy = MRI.getType(DefReg);
3148 const LLT SrcTy = MRI.getType(SrcReg);
3149 unsigned DstSize = DstTy.getSizeInBits();
3150 unsigned SrcSize = SrcTy.getSizeInBits();
3151
3152 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3153 // extended is encoded in the imm.
3154 if (Opcode == TargetOpcode::G_SEXT_INREG)
3155 SrcSize = I.getOperand(2).getImm();
3156
3157 if (DstTy.isVector())
3158 return false; // Should be handled by imported patterns.
3159
3160 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3162, __extension__ __PRETTY_FUNCTION__))
3161 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3162, __extension__ __PRETTY_FUNCTION__))
3162 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3162, __extension__ __PRETTY_FUNCTION__))
;
3163
3164 MachineInstr *ExtI;
3165
3166 // First check if we're extending the result of a load which has a dest type
3167 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3168 // GPR register on AArch64 and all loads which are smaller automatically
3169 // zero-extend the upper bits. E.g.
3170 // %v(s8) = G_LOAD %p, :: (load 1)
3171 // %v2(s32) = G_ZEXT %v(s8)
3172 if (!IsSigned) {
3173 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3174 bool IsGPR =
3175 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3176 if (LoadMI && IsGPR) {
3177 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3178 unsigned BytesLoaded = MemOp->getSize();
3179 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3180 return selectCopy(I, TII, MRI, TRI, RBI);
3181 }
3182
3183 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3184 // + SUBREG_TO_REG.
3185 //
3186 // If we are zero extending from 32 bits to 64 bits, it's possible that
3187 // the instruction implicitly does the zero extend for us. In that case,
3188 // we only need the SUBREG_TO_REG.
3189 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3190 // Unlike with the G_LOAD case, we don't want to look through copies
3191 // here. (See isDef32.)
3192 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3193 Register SubregToRegSrc = SrcReg;
3194
3195 // Does the instruction implicitly zero extend?
3196 if (!Def || !isDef32(*Def)) {
3197 // No. Zero out using an OR.
3198 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3199 const Register ZReg = AArch64::WZR;
3200 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3201 SubregToRegSrc = OrDst;
3202 }
3203
3204 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3205 .addImm(0)
3206 .addUse(SubregToRegSrc)
3207 .addImm(AArch64::sub_32);
3208
3209 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3210 MRI)) {
3211 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3212 return false;
3213 }
3214
3215 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3216 MRI)) {
3217 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3218 return false;
3219 }
3220
3221 I.eraseFromParent();
3222 return true;
3223 }
3224 }
3225
3226 if (DstSize == 64) {
3227 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3228 // FIXME: Can we avoid manually doing this?
3229 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3230 MRI)) {
3231 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3232 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3233 return false;
3234 }
3235 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3236 {&AArch64::GPR64RegClass}, {})
3237 .addImm(0)
3238 .addUse(SrcReg)
3239 .addImm(AArch64::sub_32)
3240 .getReg(0);
3241 }
3242
3243 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3244 {DefReg}, {SrcReg})
3245 .addImm(0)
3246 .addImm(SrcSize - 1);
3247 } else if (DstSize <= 32) {
3248 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3249 {DefReg}, {SrcReg})
3250 .addImm(0)
3251 .addImm(SrcSize - 1);
3252 } else {
3253 return false;
3254 }
3255
3256 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3257 I.eraseFromParent();
3258 return true;
3259 }
3260
3261 case TargetOpcode::G_SITOFP:
3262 case TargetOpcode::G_UITOFP:
3263 case TargetOpcode::G_FPTOSI:
3264 case TargetOpcode::G_FPTOUI: {
3265 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3266 SrcTy = MRI.getType(I.getOperand(1).getReg());
3267 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3268 if (NewOpc == Opcode)
3269 return false;
3270
3271 I.setDesc(TII.get(NewOpc));
3272 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3273
3274 return true;
3275 }
3276
3277 case TargetOpcode::G_FREEZE:
3278 return selectCopy(I, TII, MRI, TRI, RBI);
3279
3280 case TargetOpcode::G_INTTOPTR:
3281 // The importer is currently unable to import pointer types since they
3282 // didn't exist in SelectionDAG.
3283 return selectCopy(I, TII, MRI, TRI, RBI);
3284
3285 case TargetOpcode::G_BITCAST:
3286 // Imported SelectionDAG rules can handle every bitcast except those that
3287 // bitcast from a type to the same type. Ideally, these shouldn't occur
3288 // but we might not run an optimizer that deletes them. The other exception
3289 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3290 // of them.
3291 return selectCopy(I, TII, MRI, TRI, RBI);
3292
3293 case TargetOpcode::G_SELECT: {
3294 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3295 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3296 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3297 return false;
3298 }
3299
3300 const Register CondReg = I.getOperand(1).getReg();
3301 const Register TReg = I.getOperand(2).getReg();
3302 const Register FReg = I.getOperand(3).getReg();
3303
3304 if (tryOptSelect(I))
3305 return true;
3306
3307 // Make sure to use an unused vreg instead of wzr, so that the peephole
3308 // optimizations will be able to optimize these.
3309 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3310 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3311 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3312 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3313 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3314 return false;
3315 I.eraseFromParent();
3316 return true;
3317 }
3318 case TargetOpcode::G_ICMP: {
3319 if (Ty.isVector())
3320 return selectVectorICmp(I, MRI);
3321
3322 if (Ty != LLT::scalar(32)) {
3323 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3324 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3325 return false;
3326 }
3327
3328 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3329 const AArch64CC::CondCode InvCC =
3330 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3331 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3332 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3333 /*Src2=*/AArch64::WZR, InvCC, MIB);
3334 I.eraseFromParent();
3335 return true;
3336 }
3337
3338 case TargetOpcode::G_FCMP: {
3339 CmpInst::Predicate Pred =
3340 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3341 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3342 Pred) ||
3343 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3344 return false;
3345 I.eraseFromParent();
3346 return true;
3347 }
3348 case TargetOpcode::G_VASTART:
3349 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3350 : selectVaStartAAPCS(I, MF, MRI);
3351 case TargetOpcode::G_INTRINSIC:
3352 return selectIntrinsic(I, MRI);
3353 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3354 return selectIntrinsicWithSideEffects(I, MRI);
3355 case TargetOpcode::G_IMPLICIT_DEF: {
3356 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3357 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3358 const Register DstReg = I.getOperand(0).getReg();
3359 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3360 const TargetRegisterClass *DstRC =
3361 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3362 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3363 return true;
3364 }
3365 case TargetOpcode::G_BLOCK_ADDR: {
3366 if (TM.getCodeModel() == CodeModel::Large) {
3367 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3368 I.eraseFromParent();
3369 return true;
3370 } else {
3371 I.setDesc(TII.get(AArch64::MOVaddrBA));
3372 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3373 I.getOperand(0).getReg())
3374 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3375 /* Offset */ 0, AArch64II::MO_PAGE)
3376 .addBlockAddress(
3377 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3378 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3379 I.eraseFromParent();
3380 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3381 }
3382 }
3383 case AArch64::G_DUP: {
3384 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3385 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3386 // difficult because at RBS we may end up pessimizing the fpr case if we
3387 // decided to add an anyextend to fix this. Manual selection is the most
3388 // robust solution for now.
3389 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3390 AArch64::GPRRegBankID)
3391 return false; // We expect the fpr regbank case to be imported.
3392 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3393 if (VecTy == LLT::fixed_vector(8, 8))
3394 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3395 else if (VecTy == LLT::fixed_vector(16, 8))
3396 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3397 else if (VecTy == LLT::fixed_vector(4, 16))
3398 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3399 else if (VecTy == LLT::fixed_vector(8, 16))
3400 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3401 else
3402 return false;
3403 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3404 }
3405 case TargetOpcode::G_INTRINSIC_TRUNC:
3406 return selectIntrinsicTrunc(I, MRI);
3407 case TargetOpcode::G_INTRINSIC_ROUND:
3408 return selectIntrinsicRound(I, MRI);
3409 case TargetOpcode::G_BUILD_VECTOR:
3410 return selectBuildVector(I, MRI);
3411 case TargetOpcode::G_MERGE_VALUES:
3412 return selectMergeValues(I, MRI);
3413 case TargetOpcode::G_UNMERGE_VALUES:
3414 return selectUnmergeValues(I, MRI);
3415 case TargetOpcode::G_SHUFFLE_VECTOR:
3416 return selectShuffleVector(I, MRI);
3417 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3418 return selectExtractElt(I, MRI);
3419 case TargetOpcode::G_INSERT_VECTOR_ELT:
3420 return selectInsertElt(I, MRI);
3421 case TargetOpcode::G_CONCAT_VECTORS:
3422 return selectConcatVectors(I, MRI);
3423 case TargetOpcode::G_JUMP_TABLE:
3424 return selectJumpTable(I, MRI);
3425 case TargetOpcode::G_VECREDUCE_FADD:
3426 case TargetOpcode::G_VECREDUCE_ADD:
3427 return selectReduction(I, MRI);
3428 }
3429
3430 return false;
3431}
3432
3433bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3434 MachineRegisterInfo &MRI) {
3435 Register VecReg = I.getOperand(1).getReg();
3436 LLT VecTy = MRI.getType(VecReg);
3437 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3438 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3439 // a subregister copy afterwards.
3440 if (VecTy == LLT::fixed_vector(2, 32)) {
3441 Register DstReg = I.getOperand(0).getReg();
3442 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3443 {VecReg, VecReg});
3444 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3445 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3446 .getReg(0);
3447 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3448 I.eraseFromParent();
3449 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3450 }
3451
3452 unsigned Opc = 0;
3453 if (VecTy == LLT::fixed_vector(16, 8))
3454 Opc = AArch64::ADDVv16i8v;
3455 else if (VecTy == LLT::fixed_vector(8, 16))
3456 Opc = AArch64::ADDVv8i16v;
3457 else if (VecTy == LLT::fixed_vector(4, 32))
3458 Opc = AArch64::ADDVv4i32v;
3459 else if (VecTy == LLT::fixed_vector(2, 64))
3460 Opc = AArch64::ADDPv2i64p;
3461 else {
3462 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3463 return false;
3464 }
3465 I.setDesc(TII.get(Opc));
3466 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3467 }
3468
3469 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3470 unsigned Opc = 0;
3471 if (VecTy == LLT::fixed_vector(2, 32))
3472 Opc = AArch64::FADDPv2i32p;
3473 else if (VecTy == LLT::fixed_vector(2, 64))
3474 Opc = AArch64::FADDPv2i64p;
3475 else {
3476 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3477 return false;
3478 }
3479 I.setDesc(TII.get(Opc));
3480 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3481 }
3482 return false;
3483}
3484
3485bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3486 MachineRegisterInfo &MRI) {
3487 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3487, __extension__ __PRETTY_FUNCTION__))
;
3488 Register JTAddr = I.getOperand(0).getReg();
3489 unsigned JTI = I.getOperand(1).getIndex();
3490 Register Index = I.getOperand(2).getReg();
3491
3492 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3493 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3494
3495 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3496 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3497 {TargetReg, ScratchReg}, {JTAddr, Index})
3498 .addJumpTableIndex(JTI);
3499 // Build the indirect branch.
3500 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3501 I.eraseFromParent();
3502 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3503}
3504
3505bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3506 MachineRegisterInfo &MRI) {
3507 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3507, __extension__ __PRETTY_FUNCTION__))
;
3508 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3508, __extension__ __PRETTY_FUNCTION__))
;
3509
3510 Register DstReg = I.getOperand(0).getReg();
3511 unsigned JTI = I.getOperand(1).getIndex();
3512 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3513 auto MovMI =
3514 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3515 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3516 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3517 I.eraseFromParent();
3518 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3519}
3520
3521bool AArch64InstructionSelector::selectTLSGlobalValue(
3522 MachineInstr &I, MachineRegisterInfo &MRI) {
3523 if (!STI.isTargetMachO())
3524 return false;
3525 MachineFunction &MF = *I.getParent()->getParent();
3526 MF.getFrameInfo().setAdjustsStack(true);
3527
3528 const auto &GlobalOp = I.getOperand(1);
3529 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3530, __extension__ __PRETTY_FUNCTION__))
3530 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3530, __extension__ __PRETTY_FUNCTION__))
;
3531 const GlobalValue &GV = *GlobalOp.getGlobal();
3532
3533 auto LoadGOT =
3534 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3535 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3536
3537 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3538 {LoadGOT.getReg(0)})
3539 .addImm(0);
3540
3541 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3542 // TLS calls preserve all registers except those that absolutely must be
3543 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3544 // silly).
3545 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3546 .addUse(AArch64::X0, RegState::Implicit)
3547 .addDef(AArch64::X0, RegState::Implicit)
3548 .addRegMask(TRI.getTLSCallPreservedMask());
3549
3550 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3551 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3552 MRI);
3553 I.eraseFromParent();
3554 return true;
3555}
3556
3557bool AArch64InstructionSelector::selectIntrinsicTrunc(
3558 MachineInstr &I, MachineRegisterInfo &MRI) const {
3559 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3560
3561 // Select the correct opcode.
3562 unsigned Opc = 0;
3563 if (!SrcTy.isVector()) {
3564 switch (SrcTy.getSizeInBits()) {
3565 default:
3566 case 16:
3567 Opc = AArch64::FRINTZHr;
3568 break;
3569 case 32:
3570 Opc = AArch64::FRINTZSr;
3571 break;
3572 case 64:
3573 Opc = AArch64::FRINTZDr;
3574 break;
3575 }
3576 } else {
3577 unsigned NumElts = SrcTy.getNumElements();
3578 switch (SrcTy.getElementType().getSizeInBits()) {
3579 default:
3580 break;
3581 case 16:
3582 if (NumElts == 4)
3583 Opc = AArch64::FRINTZv4f16;
3584 else if (NumElts == 8)
3585 Opc = AArch64::FRINTZv8f16;
3586 break;
3587 case 32:
3588 if (NumElts == 2)
3589 Opc = AArch64::FRINTZv2f32;
3590 else if (NumElts == 4)
3591 Opc = AArch64::FRINTZv4f32;
3592 break;
3593 case 64:
3594 if (NumElts == 2)
3595 Opc = AArch64::FRINTZv2f64;
3596 break;
3597 }
3598 }
3599
3600 if (!Opc) {
3601 // Didn't get an opcode above, bail.
3602 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3603 return false;
3604 }
3605
3606 // Legalization would have set us up perfectly for this; we just need to
3607 // set the opcode and move on.
3608 I.setDesc(TII.get(Opc));
3609 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3610}
3611
3612bool AArch64InstructionSelector::selectIntrinsicRound(
3613 MachineInstr &I, MachineRegisterInfo &MRI) const {
3614 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3615
3616 // Select the correct opcode.
3617 unsigned Opc = 0;
3618 if (!SrcTy.isVector()) {
3619 switch (SrcTy.getSizeInBits()) {
3620 default:
3621 case 16:
3622 Opc = AArch64::FRINTAHr;
3623 break;
3624 case 32:
3625 Opc = AArch64::FRINTASr;
3626 break;
3627 case 64:
3628 Opc = AArch64::FRINTADr;
3629 break;
3630 }
3631 } else {
3632 unsigned NumElts = SrcTy.getNumElements();
3633 switch (SrcTy.getElementType().getSizeInBits()) {
3634 default:
3635 break;
3636 case 16:
3637 if (NumElts == 4)
3638 Opc = AArch64::FRINTAv4f16;
3639 else if (NumElts == 8)
3640 Opc = AArch64::FRINTAv8f16;
3641 break;
3642 case 32:
3643 if (NumElts == 2)
3644 Opc = AArch64::FRINTAv2f32;
3645 else if (NumElts == 4)
3646 Opc = AArch64::FRINTAv4f32;
3647 break;
3648 case 64:
3649 if (NumElts == 2)
3650 Opc = AArch64::FRINTAv2f64;
3651 break;
3652 }
3653 }
3654
3655 if (!Opc) {
3656 // Didn't get an opcode above, bail.
3657 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3658 return false;
3659 }
3660
3661 // Legalization would have set us up perfectly for this; we just need to
3662 // set the opcode and move on.
3663 I.setDesc(TII.get(Opc));
3664 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3665}
3666
3667bool AArch64InstructionSelector::selectVectorICmp(
3668 MachineInstr &I, MachineRegisterInfo &MRI) {
3669 Register DstReg = I.getOperand(0).getReg();
3670 LLT DstTy = MRI.getType(DstReg);
3671 Register SrcReg = I.getOperand(2).getReg();
3672 Register Src2Reg = I.getOperand(3).getReg();
3673 LLT SrcTy = MRI.getType(SrcReg);
3674
3675 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3676 unsigned NumElts = DstTy.getNumElements();
3677
3678 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3679 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3680 // Third index is cc opcode:
3681 // 0 == eq
3682 // 1 == ugt
3683 // 2 == uge
3684 // 3 == ult
3685 // 4 == ule
3686 // 5 == sgt
3687 // 6 == sge
3688 // 7 == slt
3689 // 8 == sle
3690 // ne is done by negating 'eq' result.
3691
3692 // This table below assumes that for some comparisons the operands will be
3693 // commuted.
3694 // ult op == commute + ugt op
3695 // ule op == commute + uge op
3696 // slt op == commute + sgt op
3697 // sle op == commute + sge op
3698 unsigned PredIdx = 0;
3699 bool SwapOperands = false;
3700 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3701 switch (Pred) {
3702 case CmpInst::ICMP_NE:
3703 case CmpInst::ICMP_EQ:
3704 PredIdx = 0;
3705 break;
3706 case CmpInst::ICMP_UGT:
3707 PredIdx = 1;
3708 break;
3709 case CmpInst::ICMP_UGE:
3710 PredIdx = 2;
3711 break;
3712 case CmpInst::ICMP_ULT:
3713 PredIdx = 3;
3714 SwapOperands = true;
3715 break;
3716 case CmpInst::ICMP_ULE:
3717 PredIdx = 4;
3718 SwapOperands = true;
3719 break;
3720 case CmpInst::ICMP_SGT:
3721 PredIdx = 5;
3722 break;
3723 case CmpInst::ICMP_SGE:
3724 PredIdx = 6;
3725 break;
3726 case CmpInst::ICMP_SLT:
3727 PredIdx = 7;
3728 SwapOperands = true;
3729 break;
3730 case CmpInst::ICMP_SLE:
3731 PredIdx = 8;
3732 SwapOperands = true;
3733 break;
3734 default:
3735 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3735)
;
3736 return false;
3737 }
3738
3739 // This table obviously should be tablegen'd when we have our GISel native
3740 // tablegen selector.
3741
3742 static const unsigned OpcTable[4][4][9] = {
3743 {
3744 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3745 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3746 0 /* invalid */},
3747 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3748 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3749 0 /* invalid */},
3750 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3751 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3752 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3753 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3754 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3755 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3756 },
3757 {
3758 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3759 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3760 0 /* invalid */},
3761 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3762 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3763 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3764 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3765 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3766 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3767 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3769 0 /* invalid */}
3770 },
3771 {
3772 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3773 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3774 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3775 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3776 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3777 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3778 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3779 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3780 0 /* invalid */},
3781 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3782 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3783 0 /* invalid */}
3784 },
3785 {
3786 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3787 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3788 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3789 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3790 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3791 0 /* invalid */},
3792 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3793 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3794 0 /* invalid */},
3795 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3796 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3797 0 /* invalid */}
3798 },
3799 };
3800 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3801 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3802 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3803 if (!Opc) {
3804 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3805 return false;
3806 }
3807
3808 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3809 const TargetRegisterClass *SrcRC =
3810 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3811 if (!SrcRC) {
3812 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3813 return false;
3814 }
3815
3816 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3817 if (SrcTy.getSizeInBits() == 128)
3818 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3819
3820 if (SwapOperands)
3821 std::swap(SrcReg, Src2Reg);
3822
3823 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3824 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3825
3826 // Invert if we had a 'ne' cc.
3827 if (NotOpc) {
3828 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3829 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3830 } else {
3831 MIB.buildCopy(DstReg, Cmp.getReg(0));
3832 }
3833 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3834 I.eraseFromParent();
3835 return true;
3836}
3837
3838MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3839 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3840 MachineIRBuilder &MIRBuilder) const {
3841 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3842
3843 auto BuildFn = [&](unsigned SubregIndex) {
3844 auto Ins =
3845 MIRBuilder
3846 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3847 .addImm(SubregIndex);
3848 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3849 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3850 return &*Ins;
3851 };
3852
3853 switch (EltSize) {
3854 case 16:
3855 return BuildFn(AArch64::hsub);
3856 case 32:
3857 return BuildFn(AArch64::ssub);
3858 case 64:
3859 return BuildFn(AArch64::dsub);
3860 default:
3861 return nullptr;
3862 }
3863}
3864
3865bool AArch64InstructionSelector::selectMergeValues(
3866 MachineInstr &I, MachineRegisterInfo &MRI) {
3867 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3867, __extension__ __PRETTY_FUNCTION__))
;
3868 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3869 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3870 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3870, __extension__ __PRETTY_FUNCTION__))
;
3871 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3872
3873 if (I.getNumOperands() != 3)
3874 return false;
3875
3876 // Merging 2 s64s into an s128.
3877 if (DstTy == LLT::scalar(128)) {
3878 if (SrcTy.getSizeInBits() != 64)
3879 return false;
3880 Register DstReg = I.getOperand(0).getReg();
3881 Register Src1Reg = I.getOperand(1).getReg();
3882 Register Src2Reg = I.getOperand(2).getReg();
3883 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3884 MachineInstr *InsMI =
3885 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3886 if (!InsMI)
3887 return false;
3888 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3889 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3890 if (!Ins2MI)
3891 return false;
3892 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3893 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3894 I.eraseFromParent();
3895 return true;
3896 }
3897
3898 if (RB.getID() != AArch64::GPRRegBankID)
3899 return false;
3900
3901 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3902 return false;
3903
3904 auto *DstRC = &AArch64::GPR64RegClass;
3905 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3906 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3907 TII.get(TargetOpcode::SUBREG_TO_REG))
3908 .addDef(SubToRegDef)
3909 .addImm(0)
3910 .addUse(I.getOperand(1).getReg())
3911 .addImm(AArch64::sub_32);
3912 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3913 // Need to anyext the second scalar before we can use bfm
3914 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3915 TII.get(TargetOpcode::SUBREG_TO_REG))
3916 .addDef(SubToRegDef2)
3917 .addImm(0)
3918 .addUse(I.getOperand(2).getReg())
3919 .addImm(AArch64::sub_32);
3920 MachineInstr &BFM =
3921 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3922 .addDef(I.getOperand(0).getReg())
3923 .addUse(SubToRegDef)
3924 .addUse(SubToRegDef2)
3925 .addImm(32)
3926 .addImm(31);
3927 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3928 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3929 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3930 I.eraseFromParent();
3931 return true;
3932}
3933
3934static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3935 const unsigned EltSize) {
3936 // Choose a lane copy opcode and subregister based off of the size of the
3937 // vector's elements.
3938 switch (EltSize) {
3939 case 8:
3940 CopyOpc = AArch64::DUPi8;
3941 ExtractSubReg = AArch64::bsub;
3942 break;
3943 case 16:
3944 CopyOpc = AArch64::DUPi16;
3945 ExtractSubReg = AArch64::hsub;
3946 break;
3947 case 32:
3948 CopyOpc = AArch64::DUPi32;
3949 ExtractSubReg = AArch64::ssub;
3950 break;
3951 case 64:
3952 CopyOpc = AArch64::DUPi64;
3953 ExtractSubReg = AArch64::dsub;
3954 break;
3955 default:
3956 // Unknown size, bail out.
3957 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3958 return false;
3959 }
3960 return true;
3961}
3962
3963MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3964 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3965 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3966 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3967 unsigned CopyOpc = 0;
3968 unsigned ExtractSubReg = 0;
3969 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3970 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3971 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3972 return nullptr;
3973 }
3974
3975 const TargetRegisterClass *DstRC =
3976 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3977 if (!DstRC) {
3978 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3979 return nullptr;
3980 }
3981
3982 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3983 const LLT &VecTy = MRI.getType(VecReg);
3984 const TargetRegisterClass *VecRC =
3985 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3986 if (!VecRC) {
3987 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3988 return nullptr;
3989 }
3990
3991 // The register that we're going to copy into.
3992 Register InsertReg = VecReg;
3993 if (!DstReg)
3994 DstReg = MRI.createVirtualRegister(DstRC);
3995 // If the lane index is 0, we just use a subregister COPY.
3996 if (LaneIdx == 0) {
3997 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3998 .addReg(VecReg, 0, ExtractSubReg);
3999 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4000 return &*Copy;
4001 }
4002
4003 // Lane copies require 128-bit wide registers. If we're dealing with an
4004 // unpacked vector, then we need to move up to that width. Insert an implicit
4005 // def and a subregister insert to get us there.
4006 if (VecTy.getSizeInBits() != 128) {
4007 MachineInstr *ScalarToVector = emitScalarToVector(
4008 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4009 if (!ScalarToVector)
4010 return nullptr;
4011 InsertReg = ScalarToVector->getOperand(0).getReg();
4012 }
4013
4014 MachineInstr *LaneCopyMI =
4015 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4016 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4017
4018 // Make sure that we actually constrain the initial copy.
4019 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4020 return LaneCopyMI;
4021}
4022
4023bool AArch64InstructionSelector::selectExtractElt(
4024 MachineInstr &I, MachineRegisterInfo &MRI) {
4025 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4026, __extension__ __PRETTY_FUNCTION__))
4026 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4026, __extension__ __PRETTY_FUNCTION__))
;
4027 Register DstReg = I.getOperand(0).getReg();
4028 const LLT NarrowTy = MRI.getType(DstReg);
4029 const Register SrcReg = I.getOperand(1).getReg();
4030 const LLT WideTy = MRI.getType(SrcReg);
4031 (void)WideTy;
4032 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4033, __extension__ __PRETTY_FUNCTION__))
4033 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4033, __extension__ __PRETTY_FUNCTION__))
;
4034 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4034, __extension__ __PRETTY_FUNCTION__))
;
4035
4036 // Need the lane index to determine the correct copy opcode.
4037 MachineOperand &LaneIdxOp = I.getOperand(2);
4038 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4038, __extension__ __PRETTY_FUNCTION__))
;
4039
4040 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4041 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4042 return false;
4043 }
4044
4045 // Find the index to extract from.
4046 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4047 if (!VRegAndVal)
4048 return false;
4049 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4050
4051
4052 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4053 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4054 LaneIdx, MIB);
4055 if (!Extract)
4056 return false;
4057
4058 I.eraseFromParent();
4059 return true;
4060}
4061
4062bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4063 MachineInstr &I, MachineRegisterInfo &MRI) {
4064 unsigned NumElts = I.getNumOperands() - 1;
4065 Register SrcReg = I.getOperand(NumElts).getReg();
4066 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4067 const LLT SrcTy = MRI.getType(SrcReg);
4068
4069 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4069, __extension__ __PRETTY_FUNCTION__))
;
4070 if (SrcTy.getSizeInBits() > 128) {
4071 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4072 return false;
4073 }
4074
4075 // We implement a split vector operation by treating the sub-vectors as
4076 // scalars and extracting them.
4077 const RegisterBank &DstRB =
4078 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4079 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4080 Register Dst = I.getOperand(OpIdx).getReg();
4081 MachineInstr *Extract =
4082 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4083 if (!Extract)
4084 return false;
4085 }
4086 I.eraseFromParent();
4087 return true;
4088}
4089
4090bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4091 MachineRegisterInfo &MRI) {
4092 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4093, __extension__ __PRETTY_FUNCTION__))
4093 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4093, __extension__ __PRETTY_FUNCTION__))
;
4094
4095 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4096 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4097 AArch64::FPRRegBankID ||
4098 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4099 AArch64::FPRRegBankID) {
4100 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4101 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4102 return false;
4103 }
4104
4105 // The last operand is the vector source register, and every other operand is
4106 // a register to unpack into.
4107 unsigned NumElts = I.getNumOperands() - 1;
4108 Register SrcReg = I.getOperand(NumElts).getReg();
4109 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4110 const LLT WideTy = MRI.getType(SrcReg);
4111 (void)WideTy;
4112 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4113, __extension__ __PRETTY_FUNCTION__))
4113 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4113, __extension__ __PRETTY_FUNCTION__))
;
4114 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4115, __extension__ __PRETTY_FUNCTION__))
4115 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4115, __extension__ __PRETTY_FUNCTION__))
;
4116
4117 if (!NarrowTy.isScalar())
4118 return selectSplitVectorUnmerge(I, MRI);
4119
4120 // Choose a lane copy opcode and subregister based off of the size of the
4121 // vector's elements.
4122 unsigned CopyOpc = 0;
4123 unsigned ExtractSubReg = 0;
4124 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4125 return false;
4126
4127 // Set up for the lane copies.
4128 MachineBasicBlock &MBB = *I.getParent();
4129
4130 // Stores the registers we'll be copying from.
4131 SmallVector<Register, 4> InsertRegs;
4132
4133 // We'll use the first register twice, so we only need NumElts-1 registers.
4134 unsigned NumInsertRegs = NumElts - 1;
4135
4136 // If our elements fit into exactly 128 bits, then we can copy from the source
4137 // directly. Otherwise, we need to do a bit of setup with some subregister
4138 // inserts.
4139 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4140 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4141 } else {
4142 // No. We have to perform subregister inserts. For each insert, create an
4143 // implicit def and a subregister insert, and save the register we create.
4144 const TargetRegisterClass *RC =
4145 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4146 WideTy.getScalarSizeInBits() * NumElts);
4147 unsigned SubReg = 0;
4148 bool Found = getSubRegForClass(RC, TRI, SubReg);
4149 (void)Found;
4150 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4150, __extension__ __PRETTY_FUNCTION__))
;
4151 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4152 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4153 MachineInstr &ImpDefMI =
4154 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4155 ImpDefReg);
4156
4157 // Now, create the subregister insert from SrcReg.
4158 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4159 MachineInstr &InsMI =
4160 *BuildMI(MBB, I, I.getDebugLoc(),
4161 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4162 .addUse(ImpDefReg)
4163 .addUse(SrcReg)
4164 .addImm(SubReg);
4165
4166 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4167 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4168
4169 // Save the register so that we can copy from it after.
4170 InsertRegs.push_back(InsertReg);
4171 }
4172 }
4173
4174 // Now that we've created any necessary subregister inserts, we can
4175 // create the copies.
4176 //
4177 // Perform the first copy separately as a subregister copy.
4178 Register CopyTo = I.getOperand(0).getReg();
4179 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4180 .addReg(InsertRegs[0], 0, ExtractSubReg);
4181 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4182
4183 // Now, perform the remaining copies as vector lane copies.
4184 unsigned LaneIdx = 1;
4185 for (Register InsReg : InsertRegs) {
4186 Register CopyTo = I.getOperand(LaneIdx).getReg();
4187 MachineInstr &CopyInst =
4188 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4189 .addUse(InsReg)
4190 .addImm(LaneIdx);
4191 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4192 ++LaneIdx;
4193 }
4194
4195 // Separately constrain the first copy's destination. Because of the
4196 // limitation in constrainOperandRegClass, we can't guarantee that this will
4197 // actually be constrained. So, do it ourselves using the second operand.
4198 const TargetRegisterClass *RC =
4199 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4200 if (!RC) {
4201 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4202 return false;
4203 }
4204
4205 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4206 I.eraseFromParent();
4207 return true;
4208}
4209
4210bool AArch64InstructionSelector::selectConcatVectors(
4211 MachineInstr &I, MachineRegisterInfo &MRI) {
4212 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4213, __extension__ __PRETTY_FUNCTION__))
4213 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4213, __extension__ __PRETTY_FUNCTION__))
;
4214 Register Dst = I.getOperand(0).getReg();
4215 Register Op1 = I.getOperand(1).getReg();
4216 Register Op2 = I.getOperand(2).getReg();
4217 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4218 if (!ConcatMI)
4219 return false;
4220 I.eraseFromParent();
4221 return true;
4222}
4223
4224unsigned
4225AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4226 MachineFunction &MF) const {
4227 Type *CPTy = CPVal->getType();
4228 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4229
4230 MachineConstantPool *MCP = MF.getConstantPool();
4231 return MCP->getConstantPoolIndex(CPVal, Alignment);
4232}
4233
4234MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4235 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4236 auto &MF = MIRBuilder.getMF();
4237 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4238
4239 auto Adrp =
4240 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4241 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4242
4243 MachineInstr *LoadMI = nullptr;
4244 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4245 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4246 switch (Size) {
4247 case 16:
4248 LoadMI =
4249 &*MIRBuilder
4250 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4251 .addConstantPoolIndex(CPIdx, 0,
4252 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4253 break;
4254 case 8:
4255 LoadMI =
4256 &*MIRBuilder
4257 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4258 .addConstantPoolIndex(CPIdx, 0,
4259 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4260 break;
4261 case 4:
4262 LoadMI =
4263 &*MIRBuilder
4264 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4265 .addConstantPoolIndex(CPIdx, 0,
4266 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4267 break;
4268 case 2:
4269 LoadMI =
4270 &*MIRBuilder
4271 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4272 .addConstantPoolIndex(CPIdx, 0,
4273 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4274 break;
4275 default:
4276 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4277 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4278 return nullptr;
4279 }
4280 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4281 MachineMemOperand::MOLoad,
4282 Size, Align(Size)));
4283 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4284 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4285 return LoadMI;
4286}
4287
4288/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4289/// size and RB.
4290static std::pair<unsigned, unsigned>
4291getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4292 unsigned Opc, SubregIdx;
4293 if (RB.getID() == AArch64::GPRRegBankID) {
4294 if (EltSize == 16) {
4295 Opc = AArch64::INSvi16gpr;
4296 SubregIdx = AArch64::ssub;
4297 } else if (EltSize == 32) {
4298 Opc = AArch64::INSvi32gpr;
4299 SubregIdx = AArch64::ssub;
4300 } else if (EltSize == 64) {
4301 Opc = AArch64::INSvi64gpr;
4302 SubregIdx = AArch64::dsub;
4303 } else {
4304 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4304)
;
4305 }
4306 } else {
4307 if (EltSize == 8) {
4308 Opc = AArch64::INSvi8lane;
4309 SubregIdx = AArch64::bsub;
4310 } else if (EltSize == 16) {
4311 Opc = AArch64::INSvi16lane;
4312 SubregIdx = AArch64::hsub;
4313 } else if (EltSize == 32) {
4314 Opc = AArch64::INSvi32lane;
4315 SubregIdx = AArch64::ssub;
4316 } else if (EltSize == 64) {
4317 Opc = AArch64::INSvi64lane;
4318 SubregIdx = AArch64::dsub;
4319 } else {
4320 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4320)
;
4321 }
4322 }
4323 return std::make_pair(Opc, SubregIdx);
4324}
4325
4326MachineInstr *AArch64InstructionSelector::emitInstr(
4327 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4328 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4329 const ComplexRendererFns &RenderFns) const {
4330 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4330, __extension__ __PRETTY_FUNCTION__))
;
4331 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4332, __extension__ __PRETTY_FUNCTION__))
4332 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4332, __extension__ __PRETTY_FUNCTION__))
;
4333 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4334 if (RenderFns)
4335 for (auto &Fn : *RenderFns)
4336 Fn(MI);
4337 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4338 return &*MI;
4339}
4340
4341MachineInstr *AArch64InstructionSelector::emitAddSub(
4342 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4343 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4344 MachineIRBuilder &MIRBuilder) const {
4345 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4346 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4346, __extension__ __PRETTY_FUNCTION__))
;
4347 auto Ty = MRI.getType(LHS.getReg());
4348 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4348, __extension__ __PRETTY_FUNCTION__))
;
4349 unsigned Size = Ty.getSizeInBits();
4350 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4350, __extension__ __PRETTY_FUNCTION__))
;
4351 bool Is32Bit = Size == 32;
4352
4353 // INSTRri form with positive arithmetic immediate.
4354 if (auto Fns = selectArithImmed(RHS))
4355 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4356 MIRBuilder, Fns);
4357
4358 // INSTRri form with negative arithmetic immediate.
4359 if (auto Fns = selectNegArithImmed(RHS))
4360 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4361 MIRBuilder, Fns);
4362
4363 // INSTRrx form.
4364 if (auto Fns = selectArithExtendedRegister(RHS))
4365 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4366 MIRBuilder, Fns);
4367
4368 // INSTRrs form.
4369 if (auto Fns = selectShiftedRegister(RHS))
4370 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4371 MIRBuilder, Fns);
4372 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4373 MIRBuilder);
4374}
4375
4376MachineInstr *
4377AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4378 MachineOperand &RHS,
4379 MachineIRBuilder &MIRBuilder) const {
4380 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4381 {{AArch64::ADDXri, AArch64::ADDWri},
4382 {AArch64::ADDXrs, AArch64::ADDWrs},
4383 {AArch64::ADDXrr, AArch64::ADDWrr},
4384 {AArch64::SUBXri, AArch64::SUBWri},
4385 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4386 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4387}
4388
4389MachineInstr *
4390AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4391 MachineOperand &RHS,
4392 MachineIRBuilder &MIRBuilder) const {
4393 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4394 {{AArch64::ADDSXri, AArch64::ADDSWri},
4395 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4396 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4397 {AArch64::SUBSXri, AArch64::SUBSWri},
4398 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4399 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4400}
4401
4402MachineInstr *
4403AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4404 MachineOperand &RHS,
4405 MachineIRBuilder &MIRBuilder) const {
4406 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4407 {{AArch64::SUBSXri, AArch64::SUBSWri},
4408 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4409 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4410 {AArch64::ADDSXri, AArch64::ADDSWri},
4411 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4412 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4413}
4414
4415MachineInstr *
4416AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4417 MachineIRBuilder &MIRBuilder) const {
4418 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4419 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4420 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4421 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4422}
4423
4424MachineInstr *
4425AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4426 MachineIRBuilder &MIRBuilder) const {
4427 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4427, __extension__ __PRETTY_FUNCTION__))
;
4428 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4429 LLT Ty = MRI.getType(LHS.getReg());
4430 unsigned RegSize = Ty.getSizeInBits();
4431 bool Is32Bit = (RegSize == 32);
4432 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4433 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4434 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4435 // ANDS needs a logical immediate for its immediate form. Check if we can
4436 // fold one in.
4437 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4438 int64_t Imm = ValAndVReg->Value.getSExtValue();
4439
4440 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4441 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4442 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4443 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4444 return &*TstMI;
4445 }
4446 }
4447
4448 if (auto Fns = selectLogicalShiftedRegister(RHS))
4449 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4450 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4451}
4452
4453MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4454 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4455 MachineIRBuilder &MIRBuilder) const {
4456 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4456, __extension__ __PRETTY_FUNCTION__))
;
4457 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4457, __extension__ __PRETTY_FUNCTION__))
;
4458 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4459 LLT CmpTy = MRI.getType(LHS.getReg());
4460 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4460, __extension__ __PRETTY_FUNCTION__))
;
4461 unsigned Size = CmpTy.getSizeInBits();
4462 (void)Size;
4463 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4463, __extension__ __PRETTY_FUNCTION__))
;
4464 // Fold the compare into a cmn or tst if possible.
4465 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4466 return FoldCmp;
4467 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4468 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4469}
4470
4471MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4472 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4473 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4474#ifndef NDEBUG
4475 LLT Ty = MRI.getType(Dst);
4476 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4477, __extension__ __PRETTY_FUNCTION__))
4477 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4477, __extension__ __PRETTY_FUNCTION__))
;
4478#endif
4479 const Register ZReg = AArch64::WZR;
4480 AArch64CC::CondCode CC1, CC2;
4481 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4482 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4483 if (CC2 == AArch64CC::AL)
4484 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4485 MIRBuilder);
4486 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4487 Register Def1Reg = MRI.createVirtualRegister(RC);
4488 Register Def2Reg = MRI.createVirtualRegister(RC);
4489 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4490 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4491 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4492 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4493 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4494 return &*OrMI;
4495}
4496
4497MachineInstr *
4498AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4499 MachineIRBuilder &MIRBuilder,
4500 Optional<CmpInst::Predicate> Pred) const {
4501 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4502 LLT Ty = MRI.getType(LHS);
4503 if (Ty.isVector())
4504 return nullptr;
4505 unsigned OpSize = Ty.getSizeInBits();
4506 if (OpSize != 32 && OpSize != 64)
4507 return nullptr;
4508
4509 // If this is a compare against +0.0, then we don't have
4510 // to explicitly materialize a constant.
4511 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4512 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4513
4514 auto IsEqualityPred = [](CmpInst::Predicate P) {
4515 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4516 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4517 };
4518 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4519 // Try commutating the operands.
4520 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4521 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4522 ShouldUseImm = true;
4523 std::swap(LHS, RHS);
4524 }
4525 }
4526 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4527 {AArch64::FCMPSri, AArch64::FCMPDri}};
4528 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4529
4530 // Partially build the compare. Decide if we need to add a use for the
4531 // third operand based off whether or not we're comparing against 0.0.
4532 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4533 if (!ShouldUseImm)
4534 CmpMI.addUse(RHS);
4535 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4536 return &*CmpMI;
4537}
4538
4539MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4540 Optional<Register> Dst, Register Op1, Register Op2,
4541 MachineIRBuilder &MIRBuilder) const {
4542 // We implement a vector concat by:
4543 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4544 // 2. Insert the upper vector into the destination's upper element
4545 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4546 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4547
4548 const LLT Op1Ty = MRI.getType(Op1);
4549 const LLT Op2Ty = MRI.getType(Op2);
4550
4551 if (Op1Ty != Op2Ty) {
4552 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4553 return nullptr;
4554 }
4555 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4555, __extension__ __PRETTY_FUNCTION__))
;
4556
4557 if (Op1Ty.getSizeInBits() >= 128) {
4558 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4559 return nullptr;
4560 }
4561
4562 // At the moment we just support 64 bit vector concats.
4563 if (Op1Ty.getSizeInBits() != 64) {
4564 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4565 return nullptr;
4566 }
4567
4568 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4569 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4570 const TargetRegisterClass *DstRC =
4571 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4572
4573 MachineInstr *WidenedOp1 =
4574 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4575 MachineInstr *WidenedOp2 =
4576 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4577 if (!WidenedOp1 || !WidenedOp2) {
4578 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4579 return nullptr;
4580 }
4581
4582 // Now do the insert of the upper element.
4583 unsigned InsertOpc, InsSubRegIdx;
4584 std::tie(InsertOpc, InsSubRegIdx) =
4585 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4586
4587 if (!Dst)
4588 Dst = MRI.createVirtualRegister(DstRC);
4589 auto InsElt =
4590 MIRBuilder
4591 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4592 .addImm(1) /* Lane index */
4593 .addUse(WidenedOp2->getOperand(0).getReg())
4594 .addImm(0);
4595 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4596 return &*InsElt;
4597}
4598
4599MachineInstr *
4600AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4601 Register Src2, AArch64CC::CondCode Pred,
4602 MachineIRBuilder &MIRBuilder) const {
4603 auto &MRI = *MIRBuilder.getMRI();
4604 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4605 // If we used a register class, then this won't necessarily have an LLT.
4606 // Compute the size based off whether or not we have a class or bank.
4607 unsigned Size;
4608 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4609 Size = TRI.getRegSizeInBits(*RC);
4610 else
4611 Size = MRI.getType(Dst).getSizeInBits();
4612 // Some opcodes use s1.
4613 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4613, __extension__ __PRETTY_FUNCTION__))
;
4614 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4615 unsigned Opc = OpcTable[Size == 64];
4616 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4617 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4618 return &*CSINC;
4619}
4620
4621std::pair<MachineInstr *, AArch64CC::CondCode>
4622AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4623 MachineOperand &LHS,
4624 MachineOperand &RHS,
4625 MachineIRBuilder &MIRBuilder) const {
4626 switch (Opcode) {
4627 default:
4628 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4628)
;
4629 case TargetOpcode::G_SADDO:
4630 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4631 case TargetOpcode::G_UADDO:
4632 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4633 case TargetOpcode::G_SSUBO:
4634 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4635 case TargetOpcode::G_USUBO:
4636 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4637 }
4638}
4639
4640bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4641 MachineRegisterInfo &MRI = *MIB.getMRI();
4642 // We want to recognize this pattern:
4643 //
4644 // $z = G_FCMP pred, $x, $y
4645 // ...
4646 // $w = G_SELECT $z, $a, $b
4647 //
4648 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4649 // some copies/truncs in between.)
4650 //
4651 // If we see this, then we can emit something like this:
4652 //
4653 // fcmp $x, $y
4654 // fcsel $w, $a, $b, pred
4655 //
4656 // Rather than emitting both of the rather long sequences in the standard
4657 // G_FCMP/G_SELECT select methods.
4658
4659 // First, check if the condition is defined by a compare.
4660 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4661 while (CondDef) {
4662 // We can only fold if all of the defs have one use.
4663 Register CondDefReg = CondDef->getOperand(0).getReg();
4664 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4665 // Unless it's another select.
4666 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4667 if (CondDef == &UI)
4668 continue;
4669 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4670 return false;
4671 }
4672 }
4673
4674 // We can skip over G_TRUNC since the condition is 1-bit.
4675 // Truncating/extending can have no impact on the value.
4676 unsigned Opc = CondDef->getOpcode();
4677 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4678 break;
4679
4680 // Can't see past copies from physregs.
4681 if (Opc == TargetOpcode::COPY &&
4682 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4683 return false;
4684
4685 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4686 }
4687
4688 // Is the condition defined by a compare?
4689 if (!CondDef)
4690 return false;
4691
4692 unsigned CondOpc = CondDef->getOpcode();
4693 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4694 return false;
4695
4696 AArch64CC::CondCode CondCode;
4697 if (CondOpc == TargetOpcode::G_ICMP) {
4698 auto Pred =
4699 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4700 CondCode = changeICMPPredToAArch64CC(Pred);
4701 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4702 CondDef->getOperand(1), MIB);
4703 } else {
4704 // Get the condition code for the select.
4705 auto Pred =
4706 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4707 AArch64CC::CondCode CondCode2;
4708 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4709
4710 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4711 // instructions to emit the comparison.
4712 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4713 // unnecessary.
4714 if (CondCode2 != AArch64CC::AL)
4715 return false;
4716
4717 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4718 CondDef->getOperand(3).getReg(), MIB)) {
4719 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4720 return false;
4721 }
4722 }
4723
4724 // Emit the select.
4725 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4726 I.getOperand(3).getReg(), CondCode, MIB);
4727 I.eraseFromParent();
4728 return true;
4729}
4730
4731MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4732 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4733 MachineIRBuilder &MIRBuilder) const {
4734 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4735, __extension__ __PRETTY_FUNCTION__))
4735 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4735, __extension__ __PRETTY_FUNCTION__))
;
4736 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4737 // We want to find this sort of thing:
4738 // x = G_SUB 0, y
4739 // G_ICMP z, x
4740 //
4741 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4742 // e.g:
4743 //
4744 // cmn z, y
4745
4746 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4747 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4748 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4749 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4750 // Given this:
4751 //
4752 // x = G_SUB 0, y
4753 // G_ICMP x, z
4754 //
4755 // Produce this:
4756 //
4757 // cmn y, z
4758 if (isCMN(LHSDef, P, MRI))
4759 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4760
4761 // Same idea here, but with the RHS of the compare instead:
4762 //
4763 // Given this:
4764 //
4765 // x = G_SUB 0, y
4766 // G_ICMP z, x
4767 //
4768 // Produce this:
4769 //
4770 // cmn z, y
4771 if (isCMN(RHSDef, P, MRI))
4772 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4773
4774 // Given this:
4775 //
4776 // z = G_AND x, y
4777 // G_ICMP z, 0
4778 //
4779 // Produce this if the compare is signed:
4780 //
4781 // tst x, y
4782 if (!CmpInst::isUnsigned(P) && LHSDef &&
4783 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4784 // Make sure that the RHS is 0.
4785 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4786 if (!ValAndVReg || ValAndVReg->Value != 0)
4787 return nullptr;
4788
4789 return emitTST(LHSDef->getOperand(1),
4790 LHSDef->getOperand(2), MIRBuilder);
4791 }
4792
4793 return nullptr;
4794}
4795
4796bool AArch64InstructionSelector::selectShuffleVector(
4797 MachineInstr &I, MachineRegisterInfo &MRI) {
4798 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4799 Register Src1Reg = I.getOperand(1).getReg();
4800 const LLT Src1Ty = MRI.getType(Src1Reg);
4801 Register Src2Reg = I.getOperand(2).getReg();
4802 const LLT Src2Ty = MRI.getType(Src2Reg);
4803 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4804
4805 MachineBasicBlock &MBB = *I.getParent();
4806 MachineFunction &MF = *MBB.getParent();
4807 LLVMContext &Ctx = MF.getFunction().getContext();
4808
4809 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4810 // it's originated from a <1 x T> type. Those should have been lowered into
4811 // G_BUILD_VECTOR earlier.
4812 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4813 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4814 return false;
4815 }
4816
4817 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4818
4819 SmallVector<Constant *, 64> CstIdxs;
4820 for (int Val : Mask) {
4821 // For now, any undef indexes we'll just assume to be 0. This should be
4822 // optimized in future, e.g. to select DUP etc.
4823 Val = Val < 0 ? 0 : Val;
4824 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4825 unsigned Offset = Byte + Val * BytesPerElt;
4826 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4827 }
4828 }
4829
4830 // Use a constant pool to load the index vector for TBL.
4831 Constant *CPVal = ConstantVector::get(CstIdxs);
4832 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4833 if (!IndexLoad) {
4834 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4835 return false;
4836 }
4837
4838 if (DstTy.getSizeInBits() != 128) {
4839 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4839, __extension__ __PRETTY_FUNCTION__))
;
4840 // This case can be done with TBL1.
4841 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4842 if (!Concat) {
4843 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4844 return false;
4845 }
4846
4847 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4848 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4849 IndexLoad->getOperand(0).getReg(), MIB);
4850
4851 auto TBL1 = MIB.buildInstr(
4852 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4853 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4854 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4855
4856 auto Copy =
4857 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4858 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4859 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4860 I.eraseFromParent();
4861 return true;
4862 }
4863
4864 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4865 // Q registers for regalloc.
4866 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4867 auto RegSeq = createQTuple(Regs, MIB);
4868 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4869 {RegSeq, IndexLoad->getOperand(0)});
4870 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4871 I.eraseFromParent();
4872 return true;
4873}
4874
4875MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4876 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4877 unsigned LaneIdx, const RegisterBank &RB,
4878 MachineIRBuilder &MIRBuilder) const {
4879 MachineInstr *InsElt = nullptr;
4880 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4881 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4882
4883 // Create a register to define with the insert if one wasn't passed in.
4884 if (!DstReg)
4885 DstReg = MRI.createVirtualRegister(DstRC);
4886
4887 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4888 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4889
4890 if (RB.getID() == AArch64::FPRRegBankID) {
4891 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4892 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4893 .addImm(LaneIdx)
4894 .addUse(InsSub->getOperand(0).getReg())
4895 .addImm(0);
4896 } else {
4897 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4898 .addImm(LaneIdx)
4899 .addUse(EltReg);
4900 }
4901
4902 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4903 return InsElt;
4904}
4905
4906bool AArch64InstructionSelector::selectUSMovFromExtend(
4907 MachineInstr &MI, MachineRegisterInfo &MRI) {
4908 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
4909 MI.getOpcode() != TargetOpcode::G_ZEXT &&
4910 MI.getOpcode() != TargetOpcode::G_ANYEXT)
4911 return false;
4912 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
4913 const Register DefReg = MI.getOperand(0).getReg();
4914 const LLT DstTy = MRI.getType(DefReg);
4915 unsigned DstSize = DstTy.getSizeInBits();
4916
4917 if (DstSize != 32 && DstSize != 64)
4918 return false;
4919
4920 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
4921 MI.getOperand(1).getReg(), MRI);
4922 int64_t Lane;
4923 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
4924 return false;
4925 Register Src0 = Extract->getOperand(1).getReg();
4926
4927 const LLT &VecTy = MRI.getType(Src0);
4928
4929 if (VecTy.getSizeInBits() != 128) {
4930 const MachineInstr *ScalarToVector = emitScalarToVector(
4931 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
4932 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4932, __extension__ __PRETTY_FUNCTION__))
;
4933 Src0 = ScalarToVector->getOperand(0).getReg();
4934 }
4935
4936 unsigned Opcode;
4937 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
4938 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
4939 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
4940 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
4941 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
4942 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
4943 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
4944 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
4945 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
4946 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
4947 else
4948 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4948)
;
4949
4950 // We may need to generate one of these, depending on the type and sign of the
4951 // input:
4952 // DstReg = SMOV Src0, Lane;
4953 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
4954 MachineInstr *ExtI = nullptr;
4955 if (DstSize == 64 && !IsSigned) {
4956 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4957 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
4958 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
4959 .addImm(0)
4960 .addUse(NewReg)
4961 .addImm(AArch64::sub_32);
4962 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
4963 } else
4964 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
4965
4966 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
4967 MI.eraseFromParent();
4968 return true;
4969}
4970
4971bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4972 MachineRegisterInfo &MRI) {
4973 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4973, __extension__ __PRETTY_FUNCTION__))
;
4974
4975 // Get information on the destination.
4976 Register DstReg = I.getOperand(0).getReg();
4977 const LLT DstTy = MRI.getType(DstReg);
4978 unsigned VecSize = DstTy.getSizeInBits();
4979
4980 // Get information on the element we want to insert into the destination.
4981 Register EltReg = I.getOperand(2).getReg();
4982 const LLT EltTy = MRI.getType(EltReg);
4983 unsigned EltSize = EltTy.getSizeInBits();
4984 if (EltSize < 16 || EltSize > 64)
4985 return false; // Don't support all element types yet.
4986
4987 // Find the definition of the index. Bail out if it's not defined by a
4988 // G_CONSTANT.
4989 Register IdxReg = I.getOperand(3).getReg();
4990 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
4991 if (!VRegAndVal)
4992 return false;
4993 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4994
4995 // Perform the lane insert.
4996 Register SrcReg = I.getOperand(1).getReg();
4997 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4998
4999 if (VecSize < 128) {
5000 // If the vector we're inserting into is smaller than 128 bits, widen it
5001 // to 128 to do the insert.
5002 MachineInstr *ScalarToVec =
5003 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5004 if (!ScalarToVec)
5005 return false;
5006 SrcReg = ScalarToVec->getOperand(0).getReg();
5007 }
5008
5009 // Create an insert into a new FPR128 register.
5010 // Note that if our vector is already 128 bits, we end up emitting an extra
5011 // register.
5012 MachineInstr *InsMI =
5013 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5014
5015 if (VecSize < 128) {
5016 // If we had to widen to perform the insert, then we have to demote back to
5017 // the original size to get the result we want.
5018 Register DemoteVec = InsMI->getOperand(0).getReg();
5019 const TargetRegisterClass *RC =
5020 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
5021 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5022 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5023 return false;
5024 }
5025 unsigned SubReg = 0;
5026 if (!getSubRegForClass(RC, TRI, SubReg))
5027 return false;
5028 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5029 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5030 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5031 return false;
5032 }
5033 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5034 .addReg(DemoteVec, 0, SubReg);
5035 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5036 } else {
5037 // No widening needed.
5038 InsMI->getOperand(0).setReg(DstReg);
5039 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5040 }
5041
5042 I.eraseFromParent();
5043 return true;
5044}
5045
5046MachineInstr *
5047AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5048 MachineIRBuilder &MIRBuilder,
5049 MachineRegisterInfo &MRI) {
5050 LLT DstTy = MRI.getType(Dst);
5051 unsigned DstSize = DstTy.getSizeInBits();
5052 if (CV->isNullValue()) {
5053 if (DstSize == 128) {
5054 auto Mov =
5055 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5056 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5057 return &*Mov;
5058 }
5059
5060 if (DstSize == 64) {
5061 auto Mov =
5062 MIRBuilder
5063 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5064 .addImm(0);
5065 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5066 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5067 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5068 return &*Copy;
5069 }
5070 }
5071
5072 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5073 if (!CPLoad) {
5074 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5075 return nullptr;
5076 }
5077
5078 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5079 RBI.constrainGenericRegister(
5080 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5081 return &*Copy;
5082}
5083
5084bool AArch64InstructionSelector::tryOptConstantBuildVec(
5085 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5086 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5086, __extension__ __PRETTY_FUNCTION__))
;
5087 unsigned DstSize = DstTy.getSizeInBits();
5088 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5088, __extension__ __PRETTY_FUNCTION__))
;
5089 if (DstSize < 32)
5090 return false;
5091 // Check if we're building a constant vector, in which case we want to
5092 // generate a constant pool load instead of a vector insert sequence.
5093 SmallVector<Constant *, 16> Csts;
5094 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5095 // Try to find G_CONSTANT or G_FCONSTANT
5096 auto *OpMI =
5097 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5098 if (OpMI)
5099 Csts.emplace_back(
5100 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5101 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5102 I.getOperand(Idx).getReg(), MRI)))
5103 Csts.emplace_back(
5104 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5105 else
5106 return false;
5107 }
5108 Constant *CV = ConstantVector::get(Csts);
5109 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5110 return false;
5111 I.eraseFromParent();
5112 return true;
5113}
5114
5115bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5116 MachineInstr &I, MachineRegisterInfo &MRI) {
5117 // Given:
5118 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5119 //
5120 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5121 Register Dst = I.getOperand(0).getReg();
5122 Register EltReg = I.getOperand(1).getReg();
5123 LLT EltTy = MRI.getType(EltReg);
5124 // If the index isn't on the same bank as its elements, then this can't be a
5125 // SUBREG_TO_REG.
5126 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5127 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5128 if (EltRB != DstRB)
5129 return false;
5130 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5131 [&MRI](const MachineOperand &Op) {
5132 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5133 MRI);
5134 }))
5135 return false;
5136 unsigned SubReg;
5137 const TargetRegisterClass *EltRC =
5138 getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
5139 if (!EltRC)
5140 return false;
5141 const TargetRegisterClass *DstRC =
5142 getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
5143 if (!DstRC)
5144 return false;
5145 if (!getSubRegForClass(EltRC, TRI, SubReg))
5146 return false;
5147 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5148 .addImm(0)
5149 .addUse(EltReg)
5150 .addImm(SubReg);
5151 I.eraseFromParent();
5152 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5153 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5154}
5155
5156bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5157 MachineRegisterInfo &MRI) {
5158 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5158, __extension__ __PRETTY_FUNCTION__))
;
5159 // Until we port more of the optimized selections, for now just use a vector
5160 // insert sequence.
5161 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5162 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5163 unsigned EltSize = EltTy.getSizeInBits();
5164
5165 if (tryOptConstantBuildVec(I, DstTy, MRI))
5166 return true;
5167 if (tryOptBuildVecToSubregToReg(I, MRI))
5168 return true;
5169
5170 if (EltSize < 16 || EltSize > 64)
5171 return false; // Don't support all element types yet.
5172 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5173
5174 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5175 MachineInstr *ScalarToVec =
5176 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5177 I.getOperand(1).getReg(), MIB);
5178 if (!ScalarToVec)
5179 return false;
5180
5181 Register DstVec = ScalarToVec->getOperand(0).getReg();
5182 unsigned DstSize = DstTy.getSizeInBits();
5183
5184 // Keep track of the last MI we inserted. Later on, we might be able to save
5185 // a copy using it.
5186 MachineInstr *PrevMI = nullptr;
5187 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5188 // Note that if we don't do a subregister copy, we can end up making an
5189 // extra register.
5190 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5191 MIB);
5192 DstVec = PrevMI->getOperand(0).getReg();
5193 }
5194
5195 // If DstTy's size in bits is less than 128, then emit a subregister copy
5196 // from DstVec to the last register we've defined.
5197 if (DstSize < 128) {
5198 // Force this to be FPR using the destination vector.
5199 const TargetRegisterClass *RC =
5200 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
5201 if (!RC)
5202 return false;
5203 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5204 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5205 return false;
5206 }
5207
5208 unsigned SubReg = 0;
5209 if (!getSubRegForClass(RC, TRI, SubReg))
5210 return false;
5211 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5212 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5213 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5214 return false;
5215 }
5216
5217 Register Reg = MRI.createVirtualRegister(RC);
5218 Register DstReg = I.getOperand(0).getReg();
5219
5220 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5221 MachineOperand &RegOp = I.getOperand(1);
5222 RegOp.setReg(Reg);
5223 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5224 } else {
5225 // We don't need a subregister copy. Save a copy by re-using the
5226 // destination register on the final insert.
5227 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5227, __extension__ __PRETTY_FUNCTION__))
;
5228 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5229 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5230 }
5231
5232 I.eraseFromParent();
5233 return true;
5234}
5235
5236bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5237 unsigned NumVecs,
5238 MachineInstr &I) {
5239 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5239, __extension__ __PRETTY_FUNCTION__))
;
5240 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5240, __extension__ __PRETTY_FUNCTION__))
;
5241 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5241, __extension__ __PRETTY_FUNCTION__))
;
5242 auto &MRI = *MIB.getMRI();
5243 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5244 unsigned Size = Ty.getSizeInBits();
5245 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5246, __extension__ __PRETTY_FUNCTION__))
5246 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5246, __extension__ __PRETTY_FUNCTION__))
;
5247 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5248 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5249 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5249, __extension__ __PRETTY_FUNCTION__))
;
5250 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5251 Load.cloneMemRefs(I);
5252 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5253 Register SelectedLoadDst = Load->getOperand(0).getReg();
5254 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5255 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5256 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5257 // Emit the subreg copies and immediately select them.
5258 // FIXME: We should refactor our copy code into an emitCopy helper and
5259 // clean up uses of this pattern elsewhere in the selector.
5260 selectCopy(*Vec, TII, MRI, TRI, RBI);
5261 }
5262 return true;
5263}
5264
5265bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5266 MachineInstr &I, MachineRegisterInfo &MRI) {
5267 // Find the intrinsic ID.
5268 unsigned IntrinID = I.getIntrinsicID();
5269
5270 const LLT S8 = LLT::scalar(8);
5271 const LLT S16 = LLT::scalar(16);
5272 const LLT S32 = LLT::scalar(32);
5273 const LLT S64 = LLT::scalar(64);
5274 const LLT P0 = LLT::pointer(0, 64);
5275 // Select the instruction.
5276 switch (IntrinID) {
5277 default:
5278 return false;
5279 case Intrinsic::aarch64_ldxp:
5280 case Intrinsic::aarch64_ldaxp: {
5281 auto NewI = MIB.buildInstr(
5282 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5283 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5284 {I.getOperand(3)});
5285 NewI.cloneMemRefs(I);
5286 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5287 break;
5288 }
5289 case Intrinsic::trap:
5290 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5291 break;
5292 case Intrinsic::debugtrap:
5293 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5294 break;
5295 case Intrinsic::ubsantrap:
5296 MIB.buildInstr(AArch64::BRK, {}, {})
5297 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5298 break;
5299 case Intrinsic::aarch64_neon_ld2: {
5300 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5301 unsigned Opc = 0;
5302 if (Ty == LLT::fixed_vector(8, S8))
5303 Opc = AArch64::LD2Twov8b;
5304 else if (Ty == LLT::fixed_vector(16, S8))
5305 Opc = AArch64::LD2Twov16b;
5306 else if (Ty == LLT::fixed_vector(4, S16))
5307 Opc = AArch64::LD2Twov4h;
5308 else if (Ty == LLT::fixed_vector(8, S16))
5309 Opc = AArch64::LD2Twov8h;
5310 else if (Ty == LLT::fixed_vector(2, S32))
5311 Opc = AArch64::LD2Twov2s;
5312 else if (Ty == LLT::fixed_vector(4, S32))
5313 Opc = AArch64::LD2Twov4s;
5314 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5315 Opc = AArch64::LD2Twov2d;
5316 else if (Ty == S64 || Ty == P0)
5317 Opc = AArch64::LD1Twov1d;
5318 else
5319 llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5319)
;
5320 selectVectorLoadIntrinsic(Opc, 2, I);
5321 break;
5322 }
5323 case Intrinsic::aarch64_neon_ld4: {
5324 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5325 unsigned Opc = 0;
5326 if (Ty == LLT::fixed_vector(8, S8))
5327 Opc = AArch64::LD4Fourv8b;
5328 else if (Ty == LLT::fixed_vector(16, S8))
5329 Opc = AArch64::LD4Fourv16b;
5330 else if (Ty == LLT::fixed_vector(4, S16))
5331 Opc = AArch64::LD4Fourv4h;
5332 else if (Ty == LLT::fixed_vector(8, S16))
5333 Opc = AArch64::LD4Fourv8h;
5334 else if (Ty == LLT::fixed_vector(2, S32))
5335 Opc = AArch64::LD4Fourv2s;
5336 else if (Ty == LLT::fixed_vector(4, S32))
5337 Opc = AArch64::LD4Fourv4s;
5338 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5339 Opc = AArch64::LD4Fourv2d;
5340 else if (Ty == S64 || Ty == P0)
5341 Opc = AArch64::LD1Fourv1d;
5342 else
5343 llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5343)
;
5344 selectVectorLoadIntrinsic(Opc, 4, I);
5345 break;
5346 }
5347 case Intrinsic::aarch64_neon_st2: {
5348 Register Src1 = I.getOperand(1).getReg();
5349 Register Src2 = I.getOperand(2).getReg();
5350 Register Ptr = I.getOperand(3).getReg();
5351 LLT Ty = MRI.getType(Src1);
5352 unsigned Opc;
5353 if (Ty == LLT::fixed_vector(8, S8))
5354 Opc = AArch64::ST2Twov8b;
5355 else if (Ty == LLT::fixed_vector(16, S8))
5356 Opc = AArch64::ST2Twov16b;
5357 else if (Ty == LLT::fixed_vector(4, S16))
5358 Opc = AArch64::ST2Twov4h;
5359 else if (Ty == LLT::fixed_vector(8, S16))
5360 Opc = AArch64::ST2Twov8h;
5361 else if (Ty == LLT::fixed_vector(2, S32))
5362 Opc = AArch64::ST2Twov2s;
5363 else if (Ty == LLT::fixed_vector(4, S32))
5364 Opc = AArch64::ST2Twov4s;
5365 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5366 Opc = AArch64::ST2Twov2d;
5367 else if (Ty == S64 || Ty == P0)
5368 Opc = AArch64::ST1Twov1d;
5369 else
5370 llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5370)
;
5371 SmallVector<Register, 2> Regs = {Src1, Src2};
5372 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5373 : createDTuple(Regs, MIB);
5374 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5375 Store.cloneMemRefs(I);
5376 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5377 break;
5378 }
5379 }
5380
5381 I.eraseFromParent();
5382 return true;
5383}
5384
5385bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5386 MachineRegisterInfo &MRI) {
5387 unsigned IntrinID = I.getIntrinsicID();
5388
5389 switch (IntrinID) {
5390 default:
5391 break;
5392 case Intrinsic::aarch64_crypto_sha1h: {
5393 Register DstReg = I.getOperand(0).getReg();
5394 Register SrcReg = I.getOperand(2).getReg();
5395
5396 // FIXME: Should this be an assert?
5397 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5398 MRI.getType(SrcReg).getSizeInBits() != 32)
5399 return false;
5400
5401 // The operation has to happen on FPRs. Set up some new FPR registers for
5402 // the source and destination if they are on GPRs.
5403 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5404 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5405 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5406
5407 // Make sure the copy ends up getting constrained properly.
5408 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5409 AArch64::GPR32RegClass, MRI);
5410 }
5411
5412 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5413 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5414
5415 // Actually insert the instruction.
5416 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5417 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5418
5419 // Did we create a new register for the destination?
5420 if (DstReg != I.getOperand(0).getReg()) {
5421 // Yep. Copy the result of the instruction back into the original
5422 // destination.
5423 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5424 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5425 AArch64::GPR32RegClass, MRI);
5426 }
5427
5428 I.eraseFromParent();
5429 return true;
5430 }
5431 case Intrinsic::ptrauth_sign: {
5432 Register DstReg = I.getOperand(0).getReg();
5433 Register ValReg = I.getOperand(2).getReg();
5434 uint64_t Key = I.getOperand(3).getImm();
5435 Register DiscReg = I.getOperand(4).getReg();
5436 auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
5437 bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
5438
5439 if (Key > 3)
5440 return false;
5441
5442 unsigned Opcodes[][4] = {
5443 {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
5444 {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
5445 unsigned Opcode = Opcodes[IsDiscZero][Key];
5446
5447 auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5448
5449 if (!IsDiscZero) {
5450 PAC.addUse(DiscReg);
5451 RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
5452 }
5453
5454 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5455 I.eraseFromParent();
5456 return true;
5457 }
5458 case Intrinsic::frameaddress:
5459 case Intrinsic::returnaddress: {
5460 MachineFunction &MF = *I.getParent()->getParent();
5461 MachineFrameInfo &MFI = MF.getFrameInfo();
5462
5463 unsigned Depth = I.getOperand(2).getImm();
5464 Register DstReg = I.getOperand(0).getReg();
5465 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5466
5467 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5468 if (!MFReturnAddr) {
5469 // Insert the copy from LR/X30 into the entry block, before it can be
5470 // clobbered by anything.
5471 MFI.setReturnAddressIsTaken(true);
5472 MFReturnAddr = getFunctionLiveInPhysReg(
5473 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
5474 }
5475
5476 if (STI.hasPAuth()) {
5477 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5478 } else {
5479 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5480 MIB.buildInstr(AArch64::XPACLRI);
5481 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5482 }
5483
5484 I.eraseFromParent();
5485 return true;
5486 }
5487
5488 MFI.setFrameAddressIsTaken(true);
5489 Register FrameAddr(AArch64::FP);
5490 while (Depth--) {
5491 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5492 auto Ldr =
5493 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5494 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5495 FrameAddr = NextFrame;
5496 }
5497
5498 if (IntrinID == Intrinsic::frameaddress)
5499 MIB.buildCopy({DstReg}, {FrameAddr});
5500 else {
5501 MFI.setReturnAddressIsTaken(true);
5502
5503 if (STI.hasPAuth()) {
5504 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5505 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5506 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5507 } else {
5508 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5509 .addImm(1);
5510 MIB.buildInstr(AArch64::XPACLRI);
5511 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5512 }
5513 }
5514
5515 I.eraseFromParent();
5516 return true;
5517 }
5518 case Intrinsic::swift_async_context_addr:
5519 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5520 {Register(AArch64::FP)})
5521 .addImm(8)
5522 .addImm(0);
5523 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5524
5525 MF->getFrameInfo().setFrameAddressIsTaken(true);
5526 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5527 I.eraseFromParent();
5528 return true;
5529 }
5530 return false;
5531}
5532
5533InstructionSelector::ComplexRendererFns
5534AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5535 auto MaybeImmed = getImmedFromMO(Root);
5536 if (MaybeImmed == None || *MaybeImmed > 31)
5537 return None;
5538 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5539 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5540}
5541
5542InstructionSelector::ComplexRendererFns
5543AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5544 auto MaybeImmed = getImmedFromMO(Root);
5545 if (MaybeImmed == None || *MaybeImmed > 31)
5546 return None;
5547 uint64_t Enc = 31 - *MaybeImmed;
5548 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5549}
5550
5551InstructionSelector::ComplexRendererFns
5552AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5553 auto MaybeImmed = getImmedFromMO(Root);
5554 if (MaybeImmed == None || *MaybeImmed > 63)
5555 return None;
5556 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5557 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5558}
5559
5560InstructionSelector::ComplexRendererFns
5561AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5562 auto MaybeImmed = getImmedFromMO(Root);
5563 if (MaybeImmed == None || *MaybeImmed > 63)
5564 return None;
5565 uint64_t Enc = 63 - *MaybeImmed;
5566 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5567}
5568
5569/// Helper to select an immediate value that can be represented as a 12-bit
5570/// value shifted left by either 0 or 12. If it is possible to do so, return
5571/// the immediate and shift value. If not, return None.
5572///
5573/// Used by selectArithImmed and selectNegArithImmed.
5574InstructionSelector::ComplexRendererFns
5575AArch64InstructionSelector::select12BitValueWithLeftShift(
5576 uint64_t Immed) const {
5577 unsigned ShiftAmt;
5578 if (Immed >> 12 == 0) {
5579 ShiftAmt = 0;
5580 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5581 ShiftAmt = 12;
5582 Immed = Immed >> 12;
5583 } else
5584 return None;
5585
5586 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5587 return {{
5588 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5589 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5590 }};
5591}
5592
5593/// SelectArithImmed - Select an immediate value that can be represented as
5594/// a 12-bit value shifted left by either 0 or 12. If so, return true with
5595/// Val set to the 12-bit value and Shift set to the shifter operand.
5596InstructionSelector::ComplexRendererFns
5597AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5598 // This function is called from the addsub_shifted_imm ComplexPattern,
5599 // which lists [imm] as the list of opcode it's interested in, however
5600 // we still need to check whether the operand is actually an immediate
5601 // here because the ComplexPattern opcode list is only used in
5602 // root-level opcode matching.
5603 auto MaybeImmed = getImmedFromMO(Root);
5604 if (MaybeImmed == None)
5605 return None;
5606 return select12BitValueWithLeftShift(*MaybeImmed);
5607}
5608
5609/// SelectNegArithImmed - As above, but negates the value before trying to
5610/// select it.
5611InstructionSelector::ComplexRendererFns
5612AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5613 // We need a register here, because we need to know if we have a 64 or 32
5614 // bit immediate.
5615 if (!Root.isReg())
5616 return None;
5617 auto MaybeImmed = getImmedFromMO(Root);
5618 if (MaybeImmed == None)
5619 return None;
5620 uint64_t Immed = *MaybeImmed;
5621
5622