Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 999, column 7
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-26-130535-15419-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-14~++20220126111400+9b6c2ea30219/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/ADT/Optional.h"
24#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
27#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineConstantPool.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstr.h"
33#include "llvm/CodeGen/MachineInstrBuilder.h"
34#include "llvm/CodeGen/MachineMemOperand.h"
35#include "llvm/CodeGen/MachineOperand.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/TargetOpcodes.h"
38#include "llvm/IR/Constants.h"
39#include "llvm/IR/DerivedTypes.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/PatternMatch.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/IntrinsicsAArch64.h"
44#include "llvm/Pass.h"
45#include "llvm/Support/Debug.h"
46#include "llvm/Support/raw_ostream.h"
47
48#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
49
50using namespace llvm;
51using namespace MIPatternMatch;
52using namespace AArch64GISelUtils;
53
54namespace llvm {
55class BlockFrequencyInfo;
56class ProfileSummaryInfo;
57}
58
59namespace {
60
61#define GET_GLOBALISEL_PREDICATE_BITSET
62#include "AArch64GenGlobalISel.inc"
63#undef GET_GLOBALISEL_PREDICATE_BITSET
64
65class AArch64InstructionSelector : public InstructionSelector {
66public:
67 AArch64InstructionSelector(const AArch64TargetMachine &TM,
68 const AArch64Subtarget &STI,
69 const AArch64RegisterBankInfo &RBI);
70
71 bool select(MachineInstr &I) override;
72 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
73
74 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
75 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
76 BlockFrequencyInfo *BFI) override {
77 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
78 MIB.setMF(MF);
79
80 // hasFnAttribute() is expensive to call on every BRCOND selection, so
81 // cache it here for each run of the selector.
82 ProduceNonFlagSettingCondBr =
83 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
84 MFReturnAddr = Register();
85
86 processPHIs(MF);
87 }
88
89private:
90 /// tblgen-erated 'select' implementation, used as the initial selector for
91 /// the patterns that don't require complex C++.
92 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
93
94 // A lowering phase that runs before any selection attempts.
95 // Returns true if the instruction was modified.
96 bool preISelLower(MachineInstr &I);
97
98 // An early selection function that runs before the selectImpl() call.
99 bool earlySelect(MachineInstr &I);
100
101 // Do some preprocessing of G_PHIs before we begin selection.
102 void processPHIs(MachineFunction &MF);
103
104 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
105
106 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
107 bool contractCrossBankCopyIntoStore(MachineInstr &I,
108 MachineRegisterInfo &MRI);
109
110 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
111
112 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
113 MachineRegisterInfo &MRI) const;
114 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
115 MachineRegisterInfo &MRI) const;
116
117 ///@{
118 /// Helper functions for selectCompareBranch.
119 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
120 MachineIRBuilder &MIB) const;
121 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
122 MachineIRBuilder &MIB) const;
123 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
124 MachineIRBuilder &MIB) const;
125 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
126 MachineBasicBlock *DstMBB,
127 MachineIRBuilder &MIB) const;
128 ///@}
129
130 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
131 MachineRegisterInfo &MRI);
132
133 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
134 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
135
136 // Helper to generate an equivalent of scalar_to_vector into a new register,
137 // returned via 'Dst'.
138 MachineInstr *emitScalarToVector(unsigned EltSize,
139 const TargetRegisterClass *DstRC,
140 Register Scalar,
141 MachineIRBuilder &MIRBuilder) const;
142
143 /// Emit a lane insert into \p DstReg, or a new vector register if None is
144 /// provided.
145 ///
146 /// The lane inserted into is defined by \p LaneIdx. The vector source
147 /// register is given by \p SrcReg. The register containing the element is
148 /// given by \p EltReg.
149 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
150 Register EltReg, unsigned LaneIdx,
151 const RegisterBank &RB,
152 MachineIRBuilder &MIRBuilder) const;
153
154 /// Emit a sequence of instructions representing a constant \p CV for a
155 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
156 ///
157 /// \returns the last instruction in the sequence on success, and nullptr
158 /// otherwise.
159 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
160 MachineIRBuilder &MIRBuilder,
161 MachineRegisterInfo &MRI);
162
163 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
164 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
165 MachineRegisterInfo &MRI);
166 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
167 /// SUBREG_TO_REG.
168 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
169 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
170 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
171 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
172
173 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
176 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
177
178 /// Helper function to select vector load intrinsics like
179 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
180 /// \p Opc is the opcode that the selected instruction should use.
181 /// \p NumVecs is the number of vector destinations for the instruction.
182 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
183 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
184 MachineInstr &I);
185 bool selectIntrinsicWithSideEffects(MachineInstr &I,
186 MachineRegisterInfo &MRI);
187 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
188 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
189 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
190 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
191 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
194 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
195 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
196
197 unsigned emitConstantPoolEntry(const Constant *CPVal,
198 MachineFunction &MF) const;
199 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
200 MachineIRBuilder &MIRBuilder) const;
201
202 // Emit a vector concat operation.
203 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
204 Register Op2,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit an integer compare between LHS and RHS, which checks for Predicate.
208 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
209 MachineOperand &Predicate,
210 MachineIRBuilder &MIRBuilder) const;
211
212 /// Emit a floating point comparison between \p LHS and \p RHS.
213 /// \p Pred if given is the intended predicate to use.
214 MachineInstr *emitFPCompare(Register LHS, Register RHS,
215 MachineIRBuilder &MIRBuilder,
216 Optional<CmpInst::Predicate> = None) const;
217
218 MachineInstr *emitInstr(unsigned Opcode,
219 std::initializer_list<llvm::DstOp> DstOps,
220 std::initializer_list<llvm::SrcOp> SrcOps,
221 MachineIRBuilder &MIRBuilder,
222 const ComplexRendererFns &RenderFns = None) const;
223 /// Helper function to emit an add or sub instruction.
224 ///
225 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
226 /// in a specific order.
227 ///
228 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
229 ///
230 /// \code
231 /// const std::array<std::array<unsigned, 2>, 4> Table {
232 /// {{AArch64::ADDXri, AArch64::ADDWri},
233 /// {AArch64::ADDXrs, AArch64::ADDWrs},
234 /// {AArch64::ADDXrr, AArch64::ADDWrr},
235 /// {AArch64::SUBXri, AArch64::SUBWri},
236 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
237 /// \endcode
238 ///
239 /// Each row in the table corresponds to a different addressing mode. Each
240 /// column corresponds to a different register size.
241 ///
242 /// \attention Rows must be structured as follows:
243 /// - Row 0: The ri opcode variants
244 /// - Row 1: The rs opcode variants
245 /// - Row 2: The rr opcode variants
246 /// - Row 3: The ri opcode variants for negative immediates
247 /// - Row 4: The rx opcode variants
248 ///
249 /// \attention Columns must be structured as follows:
250 /// - Column 0: The 64-bit opcode variants
251 /// - Column 1: The 32-bit opcode variants
252 ///
253 /// \p Dst is the destination register of the binop to emit.
254 /// \p LHS is the left-hand operand of the binop to emit.
255 /// \p RHS is the right-hand operand of the binop to emit.
256 MachineInstr *emitAddSub(
257 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
258 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
259 MachineIRBuilder &MIRBuilder) const;
260 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
261 MachineOperand &RHS,
262 MachineIRBuilder &MIRBuilder) const;
263 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
266 MachineIRBuilder &MIRBuilder) const;
267 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
268 MachineIRBuilder &MIRBuilder) const;
269 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
270 MachineIRBuilder &MIRBuilder) const;
271 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
272 AArch64CC::CondCode CC,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
275 const RegisterBank &DstRB, LLT ScalarTy,
276 Register VecReg, unsigned LaneIdx,
277 MachineIRBuilder &MIRBuilder) const;
278 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
279 AArch64CC::CondCode Pred,
280 MachineIRBuilder &MIRBuilder) const;
281 /// Emit a CSet for a FP compare.
282 ///
283 /// \p Dst is expected to be a 32-bit scalar register.
284 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
285 MachineIRBuilder &MIRBuilder) const;
286
287 /// Emit the overflow op for \p Opcode.
288 ///
289 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
290 /// G_USUBO, etc.
291 std::pair<MachineInstr *, AArch64CC::CondCode>
292 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
293 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
294
295 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
296 /// \p IsNegative is true if the test should be "not zero".
297 /// This will also optimize the test bit instruction when possible.
298 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
299 MachineBasicBlock *DstMBB,
300 MachineIRBuilder &MIB) const;
301
302 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
303 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
304 MachineBasicBlock *DestMBB,
305 MachineIRBuilder &MIB) const;
306
307 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
308 // We use these manually instead of using the importer since it doesn't
309 // support SDNodeXForm.
310 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
311 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
312 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
313 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
314
315 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
316 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
317 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
318
319 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
320 unsigned Size) const;
321
322 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
323 return selectAddrModeUnscaled(Root, 1);
324 }
325 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
326 return selectAddrModeUnscaled(Root, 2);
327 }
328 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
329 return selectAddrModeUnscaled(Root, 4);
330 }
331 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
332 return selectAddrModeUnscaled(Root, 8);
333 }
334 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
335 return selectAddrModeUnscaled(Root, 16);
336 }
337
338 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
339 /// from complex pattern matchers like selectAddrModeIndexed().
340 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
341 MachineRegisterInfo &MRI) const;
342
343 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
344 unsigned Size) const;
345 template <int Width>
346 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
347 return selectAddrModeIndexed(Root, Width / 8);
348 }
349
350 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
351 const MachineRegisterInfo &MRI) const;
352 ComplexRendererFns
353 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
354 unsigned SizeInBytes) const;
355
356 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
357 /// or not a shift + extend should be folded into an addressing mode. Returns
358 /// None when this is not profitable or possible.
359 ComplexRendererFns
360 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
361 MachineOperand &Offset, unsigned SizeInBytes,
362 bool WantsExt) const;
363 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
364 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
365 unsigned SizeInBytes) const;
366 template <int Width>
367 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
368 return selectAddrModeXRO(Root, Width / 8);
369 }
370
371 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
372 unsigned SizeInBytes) const;
373 template <int Width>
374 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
375 return selectAddrModeWRO(Root, Width / 8);
376 }
377
378 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
379 bool AllowROR = false) const;
380
381 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
382 return selectShiftedRegister(Root);
383 }
384
385 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
386 return selectShiftedRegister(Root, true);
387 }
388
389 /// Given an extend instruction, determine the correct shift-extend type for
390 /// that instruction.
391 ///
392 /// If the instruction is going to be used in a load or store, pass
393 /// \p IsLoadStore = true.
394 AArch64_AM::ShiftExtendType
395 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
396 bool IsLoadStore = false) const;
397
398 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
399 ///
400 /// \returns Either \p Reg if no change was necessary, or the new register
401 /// created by moving \p Reg.
402 ///
403 /// Note: This uses emitCopy right now.
404 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
405 MachineIRBuilder &MIB) const;
406
407 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
408
409 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
410 int OpIdx = -1) const;
411 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
412 int OpIdx = -1) const;
413 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
414 int OpIdx = -1) const;
415 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
416 int OpIdx = -1) const;
417 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
418 int OpIdx = -1) const;
419 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
420 int OpIdx = -1) const;
421
422 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
423 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
424
425 // Optimization methods.
426 bool tryOptSelect(MachineInstr &MI);
427 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
428 MachineOperand &Predicate,
429 MachineIRBuilder &MIRBuilder) const;
430
431 /// Return true if \p MI is a load or store of \p NumBytes bytes.
432 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
433
434 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
435 /// register zeroed out. In other words, the result of MI has been explicitly
436 /// zero extended.
437 bool isDef32(const MachineInstr &MI) const;
438
439 const AArch64TargetMachine &TM;
440 const AArch64Subtarget &STI;
441 const AArch64InstrInfo &TII;
442 const AArch64RegisterInfo &TRI;
443 const AArch64RegisterBankInfo &RBI;
444
445 bool ProduceNonFlagSettingCondBr = false;
446
447 // Some cached values used during selection.
448 // We use LR as a live-in register, and we keep track of it here as it can be
449 // clobbered by calls.
450 Register MFReturnAddr;
451
452 MachineIRBuilder MIB;
453
454#define GET_GLOBALISEL_PREDICATES_DECL
455#include "AArch64GenGlobalISel.inc"
456#undef GET_GLOBALISEL_PREDICATES_DECL
457
458// We declare the temporaries used by selectImpl() in the class to minimize the
459// cost of constructing placeholder values.
460#define GET_GLOBALISEL_TEMPORARIES_DECL
461#include "AArch64GenGlobalISel.inc"
462#undef GET_GLOBALISEL_TEMPORARIES_DECL
463};
464
465} // end anonymous namespace
466
467#define GET_GLOBALISEL_IMPL
468#include "AArch64GenGlobalISel.inc"
469#undef GET_GLOBALISEL_IMPL
470
471AArch64InstructionSelector::AArch64InstructionSelector(
472 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
473 const AArch64RegisterBankInfo &RBI)
474 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
475 RBI(RBI),
476#define GET_GLOBALISEL_PREDICATES_INIT
477#include "AArch64GenGlobalISel.inc"
478#undef GET_GLOBALISEL_PREDICATES_INIT
479#define GET_GLOBALISEL_TEMPORARIES_INIT
480#include "AArch64GenGlobalISel.inc"
481#undef GET_GLOBALISEL_TEMPORARIES_INIT
482{
483}
484
485// FIXME: This should be target-independent, inferred from the types declared
486// for each class in the bank.
487static const TargetRegisterClass *
488getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
489 const RegisterBankInfo &RBI,
490 bool GetAllRegSet = false) {
491 if (RB.getID() == AArch64::GPRRegBankID) {
492 if (Ty.getSizeInBits() <= 32)
493 return GetAllRegSet ? &AArch64::GPR32allRegClass
494 : &AArch64::GPR32RegClass;
495 if (Ty.getSizeInBits() == 64)
496 return GetAllRegSet ? &AArch64::GPR64allRegClass
497 : &AArch64::GPR64RegClass;
498 if (Ty.getSizeInBits() == 128)
499 return &AArch64::XSeqPairsClassRegClass;
500 return nullptr;
501 }
502
503 if (RB.getID() == AArch64::FPRRegBankID) {
504 switch (Ty.getSizeInBits()) {
505 case 8:
506 return &AArch64::FPR8RegClass;
507 case 16:
508 return &AArch64::FPR16RegClass;
509 case 32:
510 return &AArch64::FPR32RegClass;
511 case 64:
512 return &AArch64::FPR64RegClass;
513 case 128:
514 return &AArch64::FPR128RegClass;
515 }
516 return nullptr;
517 }
518
519 return nullptr;
520}
521
522/// Given a register bank, and size in bits, return the smallest register class
523/// that can represent that combination.
524static const TargetRegisterClass *
525getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
526 bool GetAllRegSet = false) {
527 unsigned RegBankID = RB.getID();
528
529 if (RegBankID == AArch64::GPRRegBankID) {
530 if (SizeInBits <= 32)
531 return GetAllRegSet ? &AArch64::GPR32allRegClass
532 : &AArch64::GPR32RegClass;
533 if (SizeInBits == 64)
534 return GetAllRegSet ? &AArch64::GPR64allRegClass
535 : &AArch64::GPR64RegClass;
536 if (SizeInBits == 128)
537 return &AArch64::XSeqPairsClassRegClass;
538 }
539
540 if (RegBankID == AArch64::FPRRegBankID) {
541 switch (SizeInBits) {
542 default:
543 return nullptr;
544 case 8:
545 return &AArch64::FPR8RegClass;
546 case 16:
547 return &AArch64::FPR16RegClass;
548 case 32:
549 return &AArch64::FPR32RegClass;
550 case 64:
551 return &AArch64::FPR64RegClass;
552 case 128:
553 return &AArch64::FPR128RegClass;
554 }
555 }
556
557 return nullptr;
558}
559
560/// Returns the correct subregister to use for a given register class.
561static bool getSubRegForClass(const TargetRegisterClass *RC,
562 const TargetRegisterInfo &TRI, unsigned &SubReg) {
563 switch (TRI.getRegSizeInBits(*RC)) {
49
Control jumps to the 'default' case at line 579
564 case 8:
565 SubReg = AArch64::bsub;
566 break;
567 case 16:
568 SubReg = AArch64::hsub;
569 break;
570 case 32:
571 if (RC != &AArch64::FPR32RegClass)
572 SubReg = AArch64::sub_32;
573 else
574 SubReg = AArch64::ssub;
575 break;
576 case 64:
577 SubReg = AArch64::dsub;
578 break;
579 default:
580 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
50
Assuming 'DebugFlag' is false
51
Loop condition is false. Exiting loop
581 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
582 return false;
52
Returning without writing to 'SubReg'
583 }
584
585 return true;
586}
587
588/// Returns the minimum size the given register bank can hold.
589static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
590 switch (RB.getID()) {
591 case AArch64::GPRRegBankID:
592 return 32;
593 case AArch64::FPRRegBankID:
594 return 8;
595 default:
596 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 596)
;
597 }
598}
599
600/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
601/// Helper function for functions like createDTuple and createQTuple.
602///
603/// \p RegClassIDs - The list of register class IDs available for some tuple of
604/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
605/// expected to contain between 2 and 4 tuple classes.
606///
607/// \p SubRegs - The list of subregister classes associated with each register
608/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
609/// subregister class. The index of each subregister class is expected to
610/// correspond with the index of each register class.
611///
612/// \returns Either the destination register of REG_SEQUENCE instruction that
613/// was created, or the 0th element of \p Regs if \p Regs contains a single
614/// element.
615static Register createTuple(ArrayRef<Register> Regs,
616 const unsigned RegClassIDs[],
617 const unsigned SubRegs[], MachineIRBuilder &MIB) {
618 unsigned NumRegs = Regs.size();
619 if (NumRegs == 1)
620 return Regs[0];
621 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 622, __extension__ __PRETTY_FUNCTION__))
622 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 622, __extension__ __PRETTY_FUNCTION__))
;
623 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
624 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
625 auto RegSequence =
626 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
627 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
628 RegSequence.addUse(Regs[I]);
629 RegSequence.addImm(SubRegs[I]);
630 }
631 return RegSequence.getReg(0);
632}
633
634/// Create a tuple of D-registers using the registers in \p Regs.
635static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
636 static const unsigned RegClassIDs[] = {
637 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
638 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
639 AArch64::dsub2, AArch64::dsub3};
640 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
641}
642
643/// Create a tuple of Q-registers using the registers in \p Regs.
644static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
645 static const unsigned RegClassIDs[] = {
646 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
647 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
648 AArch64::qsub2, AArch64::qsub3};
649 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
650}
651
652static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
653 auto &MI = *Root.getParent();
654 auto &MBB = *MI.getParent();
655 auto &MF = *MBB.getParent();
656 auto &MRI = MF.getRegInfo();
657 uint64_t Immed;
658 if (Root.isImm())
659 Immed = Root.getImm();
660 else if (Root.isCImm())
661 Immed = Root.getCImm()->getZExtValue();
662 else if (Root.isReg()) {
663 auto ValAndVReg =
664 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
665 if (!ValAndVReg)
666 return None;
667 Immed = ValAndVReg->Value.getSExtValue();
668 } else
669 return None;
670 return Immed;
671}
672
673/// Check whether \p I is a currently unsupported binary operation:
674/// - it has an unsized type
675/// - an operand is not a vreg
676/// - all operands are not in the same bank
677/// These are checks that should someday live in the verifier, but right now,
678/// these are mostly limitations of the aarch64 selector.
679static bool unsupportedBinOp(const MachineInstr &I,
680 const AArch64RegisterBankInfo &RBI,
681 const MachineRegisterInfo &MRI,
682 const AArch64RegisterInfo &TRI) {
683 LLT Ty = MRI.getType(I.getOperand(0).getReg());
684 if (!Ty.isValid()) {
685 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
686 return true;
687 }
688
689 const RegisterBank *PrevOpBank = nullptr;
690 for (auto &MO : I.operands()) {
691 // FIXME: Support non-register operands.
692 if (!MO.isReg()) {
693 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
694 return true;
695 }
696
697 // FIXME: Can generic operations have physical registers operands? If
698 // so, this will need to be taught about that, and we'll need to get the
699 // bank out of the minimal class for the register.
700 // Either way, this needs to be documented (and possibly verified).
701 if (!Register::isVirtualRegister(MO.getReg())) {
702 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
703 return true;
704 }
705
706 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
707 if (!OpBank) {
708 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
709 return true;
710 }
711
712 if (PrevOpBank && OpBank != PrevOpBank) {
713 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
714 return true;
715 }
716 PrevOpBank = OpBank;
717 }
718 return false;
719}
720
721/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
722/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
723/// and of size \p OpSize.
724/// \returns \p GenericOpc if the combination is unsupported.
725static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
726 unsigned OpSize) {
727 switch (RegBankID) {
728 case AArch64::GPRRegBankID:
729 if (OpSize == 32) {
730 switch (GenericOpc) {
731 case TargetOpcode::G_SHL:
732 return AArch64::LSLVWr;
733 case TargetOpcode::G_LSHR:
734 return AArch64::LSRVWr;
735 case TargetOpcode::G_ASHR:
736 return AArch64::ASRVWr;
737 default:
738 return GenericOpc;
739 }
740 } else if (OpSize == 64) {
741 switch (GenericOpc) {
742 case TargetOpcode::G_PTR_ADD:
743 return AArch64::ADDXrr;
744 case TargetOpcode::G_SHL:
745 return AArch64::LSLVXr;
746 case TargetOpcode::G_LSHR:
747 return AArch64::LSRVXr;
748 case TargetOpcode::G_ASHR:
749 return AArch64::ASRVXr;
750 default:
751 return GenericOpc;
752 }
753 }
754 break;
755 case AArch64::FPRRegBankID:
756 switch (OpSize) {
757 case 32:
758 switch (GenericOpc) {
759 case TargetOpcode::G_FADD:
760 return AArch64::FADDSrr;
761 case TargetOpcode::G_FSUB:
762 return AArch64::FSUBSrr;
763 case TargetOpcode::G_FMUL:
764 return AArch64::FMULSrr;
765 case TargetOpcode::G_FDIV:
766 return AArch64::FDIVSrr;
767 default:
768 return GenericOpc;
769 }
770 case 64:
771 switch (GenericOpc) {
772 case TargetOpcode::G_FADD:
773 return AArch64::FADDDrr;
774 case TargetOpcode::G_FSUB:
775 return AArch64::FSUBDrr;
776 case TargetOpcode::G_FMUL:
777 return AArch64::FMULDrr;
778 case TargetOpcode::G_FDIV:
779 return AArch64::FDIVDrr;
780 case TargetOpcode::G_OR:
781 return AArch64::ORRv8i8;
782 default:
783 return GenericOpc;
784 }
785 }
786 break;
787 }
788 return GenericOpc;
789}
790
791/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
792/// appropriate for the (value) register bank \p RegBankID and of memory access
793/// size \p OpSize. This returns the variant with the base+unsigned-immediate
794/// addressing mode (e.g., LDRXui).
795/// \returns \p GenericOpc if the combination is unsupported.
796static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
797 unsigned OpSize) {
798 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
799 switch (RegBankID) {
800 case AArch64::GPRRegBankID:
801 switch (OpSize) {
802 case 8:
803 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
804 case 16:
805 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
806 case 32:
807 return isStore ? AArch64::STRWui : AArch64::LDRWui;
808 case 64:
809 return isStore ? AArch64::STRXui : AArch64::LDRXui;
810 }
811 break;
812 case AArch64::FPRRegBankID:
813 switch (OpSize) {
814 case 8:
815 return isStore ? AArch64::STRBui : AArch64::LDRBui;
816 case 16:
817 return isStore ? AArch64::STRHui : AArch64::LDRHui;
818 case 32:
819 return isStore ? AArch64::STRSui : AArch64::LDRSui;
820 case 64:
821 return isStore ? AArch64::STRDui : AArch64::LDRDui;
822 case 128:
823 return isStore ? AArch64::STRQui : AArch64::LDRQui;
824 }
825 break;
826 }
827 return GenericOpc;
828}
829
830#ifndef NDEBUG
831/// Helper function that verifies that we have a valid copy at the end of
832/// selectCopy. Verifies that the source and dest have the expected sizes and
833/// then returns true.
834static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
835 const MachineRegisterInfo &MRI,
836 const TargetRegisterInfo &TRI,
837 const RegisterBankInfo &RBI) {
838 const Register DstReg = I.getOperand(0).getReg();
839 const Register SrcReg = I.getOperand(1).getReg();
840 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
841 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
842
843 // Make sure the size of the source and dest line up.
844 assert((static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
845 (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
846 // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
847 // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
848 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
849 // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
850 // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
851 // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
852 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
853 "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 853, __extension__ __PRETTY_FUNCTION__))
;
854
855 // Check the size of the destination.
856 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 857, __extension__ __PRETTY_FUNCTION__))
857 "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 857, __extension__ __PRETTY_FUNCTION__))
;
858
859 return true;
860}
861#endif
862
863/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
864/// to \p *To.
865///
866/// E.g "To = COPY SrcReg:SubReg"
867static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
868 const RegisterBankInfo &RBI, Register SrcReg,
869 const TargetRegisterClass *To, unsigned SubReg) {
870 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 870, __extension__ __PRETTY_FUNCTION__))
;
871 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 871, __extension__ __PRETTY_FUNCTION__))
;
872 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 872, __extension__ __PRETTY_FUNCTION__))
;
873
874 MachineIRBuilder MIB(I);
875 auto SubRegCopy =
876 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
877 MachineOperand &RegOp = I.getOperand(1);
878 RegOp.setReg(SubRegCopy.getReg(0));
879
880 // It's possible that the destination register won't be constrained. Make
881 // sure that happens.
882 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
883 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
884
885 return true;
886}
887
888/// Helper function to get the source and destination register classes for a
889/// copy. Returns a std::pair containing the source register class for the
890/// copy, and the destination register class for the copy. If a register class
891/// cannot be determined, then it will be nullptr.
892static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
893getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
894 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
895 const RegisterBankInfo &RBI) {
896 Register DstReg = I.getOperand(0).getReg();
897 Register SrcReg = I.getOperand(1).getReg();
898 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
899 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
900 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
901 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
902
903 // Special casing for cross-bank copies of s1s. We can technically represent
904 // a 1-bit value with any size of register. The minimum size for a GPR is 32
905 // bits. So, we need to put the FPR on 32 bits as well.
906 //
907 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
908 // then we can pull it into the helpers that get the appropriate class for a
909 // register bank. Or make a new helper that carries along some constraint
910 // information.
911 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
912 SrcSize = DstSize = 32;
913
914 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
915 getMinClassForRegBank(DstRegBank, DstSize, true)};
916}
917
918static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
919 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
920 const RegisterBankInfo &RBI) {
921 Register DstReg = I.getOperand(0).getReg();
922 Register SrcReg = I.getOperand(1).getReg();
923 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
924 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
925
926 // Find the correct register classes for the source and destination registers.
927 const TargetRegisterClass *SrcRC;
928 const TargetRegisterClass *DstRC;
929 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
16
Calling 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
27
Returning from 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
28
Calling 'tuple::operator='
31
Returning from 'tuple::operator='
930
931 if (!DstRC) {
32
Assuming 'DstRC' is non-null
33
Taking false branch
932 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
933 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
934 return false;
935 }
936
937 // A couple helpers below, for making sure that the copy we produce is valid.
938
939 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
940 // to verify that the src and dst are the same size, since that's handled by
941 // the SUBREG_TO_REG.
942 bool KnownValid = false;
943
944 // Returns true, or asserts if something we don't expect happens. Instead of
945 // returning true, we return isValidCopy() to ensure that we verify the
946 // result.
947 auto CheckCopy = [&]() {
948 // If we have a bitcast or something, we can't have physical registers.
949 assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 952, __extension__ __PRETTY_FUNCTION__))
950 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 952, __extension__ __PRETTY_FUNCTION__))
951 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 952, __extension__ __PRETTY_FUNCTION__))
952 "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 952, __extension__ __PRETTY_FUNCTION__))
;
953 bool ValidCopy = true;
954#ifndef NDEBUG
955 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
956 assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy."
) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 956, __extension__ __PRETTY_FUNCTION__))
;
957#endif
958 (void)KnownValid;
959 return ValidCopy;
960 };
961
962 // Is this a copy? If so, then we may need to insert a subregister copy.
963 if (I.isCopy()) {
34
Calling 'MachineInstr::isCopy'
37
Returning from 'MachineInstr::isCopy'
38
Taking true branch
964 // Yes. Check if there's anything to fix up.
965 if (!SrcRC) {
39
Assuming 'SrcRC' is non-null
40
Taking false branch
966 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
967 return false;
968 }
969
970 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
971 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
972 unsigned SubReg;
41
'SubReg' declared without an initial value
973
974 // If the source bank doesn't support a subregister copy small enough,
975 // then we first need to copy to the destination bank.
976 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
42
Assuming the condition is false
43
Taking false branch
977 const TargetRegisterClass *DstTempRC =
978 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
979 getSubRegForClass(DstRC, TRI, SubReg);
980
981 MachineIRBuilder MIB(I);
982 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
983 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
984 } else if (SrcSize > DstSize) {
44
Assuming 'SrcSize' is <= 'DstSize'
45
Taking false branch
985 // If the source register is bigger than the destination we need to
986 // perform a subregister copy.
987 const TargetRegisterClass *SubRegRC =
988 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
989 getSubRegForClass(SubRegRC, TRI, SubReg);
990 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
991 } else if (DstSize > SrcSize) {
46
Assuming 'DstSize' is > 'SrcSize'
47
Taking true branch
992 // If the destination register is bigger than the source we need to do
993 // a promotion using SUBREG_TO_REG.
994 const TargetRegisterClass *PromotionRC =
995 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
996 getSubRegForClass(SrcRC, TRI, SubReg);
48
Calling 'getSubRegForClass'
53
Returning from 'getSubRegForClass'
997
998 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
999 BuildMI(*I.getParent(), I, I.getDebugLoc(),
54
1st function call argument is an uninitialized value
1000 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1001 .addImm(0)
1002 .addUse(SrcReg)
1003 .addImm(SubReg);
1004 MachineOperand &RegOp = I.getOperand(1);
1005 RegOp.setReg(PromoteReg);
1006
1007 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1008 KnownValid = true;
1009 }
1010
1011 // If the destination is a physical register, then there's nothing to
1012 // change, so we're done.
1013 if (Register::isPhysicalRegister(DstReg))
1014 return CheckCopy();
1015 }
1016
1017 // No need to constrain SrcReg. It will get constrained when we hit another
1018 // of its use or its defs. Copies do not have constraints.
1019 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1020 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1021 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1022 return false;
1023 }
1024
1025 // If this a GPR ZEXT that we want to just reduce down into a copy.
1026 // The sizes will be mismatched with the source < 32b but that's ok.
1027 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1028 I.setDesc(TII.get(AArch64::COPY));
1029 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1029, __extension__ __PRETTY_FUNCTION__))
;
1030 return selectCopy(I, TII, MRI, TRI, RBI);
1031 }
1032
1033 I.setDesc(TII.get(AArch64::COPY));
1034 return CheckCopy();
1035}
1036
1037static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1038 if (!DstTy.isScalar() || !SrcTy.isScalar())
1039 return GenericOpc;
1040
1041 const unsigned DstSize = DstTy.getSizeInBits();
1042 const unsigned SrcSize = SrcTy.getSizeInBits();
1043
1044 switch (DstSize) {
1045 case 32:
1046 switch (SrcSize) {
1047 case 32:
1048 switch (GenericOpc) {
1049 case TargetOpcode::G_SITOFP:
1050 return AArch64::SCVTFUWSri;
1051 case TargetOpcode::G_UITOFP:
1052 return AArch64::UCVTFUWSri;
1053 case TargetOpcode::G_FPTOSI:
1054 return AArch64::FCVTZSUWSr;
1055 case TargetOpcode::G_FPTOUI:
1056 return AArch64::FCVTZUUWSr;
1057 default:
1058 return GenericOpc;
1059 }
1060 case 64:
1061 switch (GenericOpc) {
1062 case TargetOpcode::G_SITOFP:
1063 return AArch64::SCVTFUXSri;
1064 case TargetOpcode::G_UITOFP:
1065 return AArch64::UCVTFUXSri;
1066 case TargetOpcode::G_FPTOSI:
1067 return AArch64::FCVTZSUWDr;
1068 case TargetOpcode::G_FPTOUI:
1069 return AArch64::FCVTZUUWDr;
1070 default:
1071 return GenericOpc;
1072 }
1073 default:
1074 return GenericOpc;
1075 }
1076 case 64:
1077 switch (SrcSize) {
1078 case 32:
1079 switch (GenericOpc) {
1080 case TargetOpcode::G_SITOFP:
1081 return AArch64::SCVTFUWDri;
1082 case TargetOpcode::G_UITOFP:
1083 return AArch64::UCVTFUWDri;
1084 case TargetOpcode::G_FPTOSI:
1085 return AArch64::FCVTZSUXSr;
1086 case TargetOpcode::G_FPTOUI:
1087 return AArch64::FCVTZUUXSr;
1088 default:
1089 return GenericOpc;
1090 }
1091 case 64:
1092 switch (GenericOpc) {
1093 case TargetOpcode::G_SITOFP:
1094 return AArch64::SCVTFUXDri;
1095 case TargetOpcode::G_UITOFP:
1096 return AArch64::UCVTFUXDri;
1097 case TargetOpcode::G_FPTOSI:
1098 return AArch64::FCVTZSUXDr;
1099 case TargetOpcode::G_FPTOUI:
1100 return AArch64::FCVTZUUXDr;
1101 default:
1102 return GenericOpc;
1103 }
1104 default:
1105 return GenericOpc;
1106 }
1107 default:
1108 return GenericOpc;
1109 };
1110 return GenericOpc;
1111}
1112
1113MachineInstr *
1114AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1115 Register False, AArch64CC::CondCode CC,
1116 MachineIRBuilder &MIB) const {
1117 MachineRegisterInfo &MRI = *MIB.getMRI();
1118 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1120, __extension__ __PRETTY_FUNCTION__))
1119 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1120, __extension__ __PRETTY_FUNCTION__))
1120 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1120, __extension__ __PRETTY_FUNCTION__))
;
1121 LLT Ty = MRI.getType(True);
1122 if (Ty.isVector())
1123 return nullptr;
1124 const unsigned Size = Ty.getSizeInBits();
1125 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1126, __extension__ __PRETTY_FUNCTION__))
1126 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1126, __extension__ __PRETTY_FUNCTION__))
;
1127 const bool Is32Bit = Size == 32;
1128 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1129 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1130 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1131 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1132 return &*FCSel;
1133 }
1134
1135 // By default, we'll try and emit a CSEL.
1136 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1137 bool Optimized = false;
1138 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1139 &Optimized](Register &Reg, Register &OtherReg,
1140 bool Invert) {
1141 if (Optimized)
1142 return false;
1143
1144 // Attempt to fold:
1145 //
1146 // %sub = G_SUB 0, %x
1147 // %select = G_SELECT cc, %reg, %sub
1148 //
1149 // Into:
1150 // %select = CSNEG %reg, %x, cc
1151 Register MatchReg;
1152 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1153 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1154 Reg = MatchReg;
1155 if (Invert) {
1156 CC = AArch64CC::getInvertedCondCode(CC);
1157 std::swap(Reg, OtherReg);
1158 }
1159 return true;
1160 }
1161
1162 // Attempt to fold:
1163 //
1164 // %xor = G_XOR %x, -1
1165 // %select = G_SELECT cc, %reg, %xor
1166 //
1167 // Into:
1168 // %select = CSINV %reg, %x, cc
1169 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1170 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1171 Reg = MatchReg;
1172 if (Invert) {
1173 CC = AArch64CC::getInvertedCondCode(CC);
1174 std::swap(Reg, OtherReg);
1175 }
1176 return true;
1177 }
1178
1179 // Attempt to fold:
1180 //
1181 // %add = G_ADD %x, 1
1182 // %select = G_SELECT cc, %reg, %add
1183 //
1184 // Into:
1185 // %select = CSINC %reg, %x, cc
1186 if (mi_match(Reg, MRI,
1187 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1188 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1189 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1190 Reg = MatchReg;
1191 if (Invert) {
1192 CC = AArch64CC::getInvertedCondCode(CC);
1193 std::swap(Reg, OtherReg);
1194 }
1195 return true;
1196 }
1197
1198 return false;
1199 };
1200
1201 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1202 // true/false values are constants.
1203 // FIXME: All of these patterns already exist in tablegen. We should be
1204 // able to import these.
1205 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1206 &Optimized]() {
1207 if (Optimized)
1208 return false;
1209 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1210 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1211 if (!TrueCst && !FalseCst)
1212 return false;
1213
1214 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1215 if (TrueCst && FalseCst) {
1216 int64_t T = TrueCst->Value.getSExtValue();
1217 int64_t F = FalseCst->Value.getSExtValue();
1218
1219 if (T == 0 && F == 1) {
1220 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1221 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1222 True = ZReg;
1223 False = ZReg;
1224 return true;
1225 }
1226
1227 if (T == 0 && F == -1) {
1228 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1229 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1230 True = ZReg;
1231 False = ZReg;
1232 return true;
1233 }
1234 }
1235
1236 if (TrueCst) {
1237 int64_t T = TrueCst->Value.getSExtValue();
1238 if (T == 1) {
1239 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1240 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1241 True = False;
1242 False = ZReg;
1243 CC = AArch64CC::getInvertedCondCode(CC);
1244 return true;
1245 }
1246
1247 if (T == -1) {
1248 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1249 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1250 True = False;
1251 False = ZReg;
1252 CC = AArch64CC::getInvertedCondCode(CC);
1253 return true;
1254 }
1255 }
1256
1257 if (FalseCst) {
1258 int64_t F = FalseCst->Value.getSExtValue();
1259 if (F == 1) {
1260 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1261 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1262 False = ZReg;
1263 return true;
1264 }
1265
1266 if (F == -1) {
1267 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1268 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1269 False = ZReg;
1270 return true;
1271 }
1272 }
1273 return false;
1274 };
1275
1276 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1277 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1278 Optimized |= TryOptSelectCst();
1279 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1280 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1281 return &*SelectInst;
1282}
1283
1284static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1285 switch (P) {
1286 default:
1287 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1287)
;
1288 case CmpInst::ICMP_NE:
1289 return AArch64CC::NE;
1290 case CmpInst::ICMP_EQ:
1291 return AArch64CC::EQ;
1292 case CmpInst::ICMP_SGT:
1293 return AArch64CC::GT;
1294 case CmpInst::ICMP_SGE:
1295 return AArch64CC::GE;
1296 case CmpInst::ICMP_SLT:
1297 return AArch64CC::LT;
1298 case CmpInst::ICMP_SLE:
1299 return AArch64CC::LE;
1300 case CmpInst::ICMP_UGT:
1301 return AArch64CC::HI;
1302 case CmpInst::ICMP_UGE:
1303 return AArch64CC::HS;
1304 case CmpInst::ICMP_ULT:
1305 return AArch64CC::LO;
1306 case CmpInst::ICMP_ULE:
1307 return AArch64CC::LS;
1308 }
1309}
1310
1311/// Return a register which can be used as a bit to test in a TB(N)Z.
1312static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1313 MachineRegisterInfo &MRI) {
1314 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1314, __extension__ __PRETTY_FUNCTION__))
;
1315 bool HasZext = false;
1316 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1317 unsigned Opc = MI->getOpcode();
1318
1319 if (!MI->getOperand(0).isReg() ||
1320 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1321 break;
1322
1323 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1324 //
1325 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1326 // on the truncated x is the same as the bit number on x.
1327 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1328 Opc == TargetOpcode::G_TRUNC) {
1329 if (Opc == TargetOpcode::G_ZEXT)
1330 HasZext = true;
1331
1332 Register NextReg = MI->getOperand(1).getReg();
1333 // Did we find something worth folding?
1334 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1335 break;
1336
1337 // NextReg is worth folding. Keep looking.
1338 Reg = NextReg;
1339 continue;
1340 }
1341
1342 // Attempt to find a suitable operation with a constant on one side.
1343 Optional<uint64_t> C;
1344 Register TestReg;
1345 switch (Opc) {
1346 default:
1347 break;
1348 case TargetOpcode::G_AND:
1349 case TargetOpcode::G_XOR: {
1350 TestReg = MI->getOperand(1).getReg();
1351 Register ConstantReg = MI->getOperand(2).getReg();
1352 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1353 if (!VRegAndVal) {
1354 // AND commutes, check the other side for a constant.
1355 // FIXME: Can we canonicalize the constant so that it's always on the
1356 // same side at some point earlier?
1357 std::swap(ConstantReg, TestReg);
1358 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1359 }
1360 if (VRegAndVal) {
1361 if (HasZext)
1362 C = VRegAndVal->Value.getZExtValue();
1363 else
1364 C = VRegAndVal->Value.getSExtValue();
1365 }
1366 break;
1367 }
1368 case TargetOpcode::G_ASHR:
1369 case TargetOpcode::G_LSHR:
1370 case TargetOpcode::G_SHL: {
1371 TestReg = MI->getOperand(1).getReg();
1372 auto VRegAndVal =
1373 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1374 if (VRegAndVal)
1375 C = VRegAndVal->Value.getSExtValue();
1376 break;
1377 }
1378 }
1379
1380 // Didn't find a constant or viable register. Bail out of the loop.
1381 if (!C || !TestReg.isValid())
1382 break;
1383
1384 // We found a suitable instruction with a constant. Check to see if we can
1385 // walk through the instruction.
1386 Register NextReg;
1387 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1388 switch (Opc) {
1389 default:
1390 break;
1391 case TargetOpcode::G_AND:
1392 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1393 if ((*C >> Bit) & 1)
1394 NextReg = TestReg;
1395 break;
1396 case TargetOpcode::G_SHL:
1397 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1398 // the type of the register.
1399 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1400 NextReg = TestReg;
1401 Bit = Bit - *C;
1402 }
1403 break;
1404 case TargetOpcode::G_ASHR:
1405 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1406 // in x
1407 NextReg = TestReg;
1408 Bit = Bit + *C;
1409 if (Bit >= TestRegSize)
1410 Bit = TestRegSize - 1;
1411 break;
1412 case TargetOpcode::G_LSHR:
1413 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1414 if ((Bit + *C) < TestRegSize) {
1415 NextReg = TestReg;
1416 Bit = Bit + *C;
1417 }
1418 break;
1419 case TargetOpcode::G_XOR:
1420 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1421 // appropriate.
1422 //
1423 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1424 //
1425 // tbz x', b -> tbnz x, b
1426 //
1427 // Because x' only has the b-th bit set if x does not.
1428 if ((*C >> Bit) & 1)
1429 Invert = !Invert;
1430 NextReg = TestReg;
1431 break;
1432 }
1433
1434 // Check if we found anything worth folding.
1435 if (!NextReg.isValid())
1436 return Reg;
1437 Reg = NextReg;
1438 }
1439
1440 return Reg;
1441}
1442
1443MachineInstr *AArch64InstructionSelector::emitTestBit(
1444 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1445 MachineIRBuilder &MIB) const {
1446 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1446, __extension__ __PRETTY_FUNCTION__))
;
1447 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1448, __extension__ __PRETTY_FUNCTION__))
1448 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1448, __extension__ __PRETTY_FUNCTION__))
;
1449 MachineRegisterInfo &MRI = *MIB.getMRI();
1450
1451 // Attempt to optimize the test bit by walking over instructions.
1452 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1453 LLT Ty = MRI.getType(TestReg);
1454 unsigned Size = Ty.getSizeInBits();
1455 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1455, __extension__ __PRETTY_FUNCTION__))
;
1456 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1456, __extension__ __PRETTY_FUNCTION__))
;
1457
1458 // When the test register is a 64-bit register, we have to narrow to make
1459 // TBNZW work.
1460 bool UseWReg = Bit < 32;
1461 unsigned NecessarySize = UseWReg ? 32 : 64;
1462 if (Size != NecessarySize)
1463 TestReg = moveScalarRegClass(
1464 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1465 MIB);
1466
1467 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1468 {AArch64::TBZW, AArch64::TBNZW}};
1469 unsigned Opc = OpcTable[UseWReg][IsNegative];
1470 auto TestBitMI =
1471 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1472 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1473 return &*TestBitMI;
1474}
1475
1476bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1477 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1478 MachineIRBuilder &MIB) const {
1479 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1479, __extension__ __PRETTY_FUNCTION__))
;
1480 // Given something like this:
1481 //
1482 // %x = ...Something...
1483 // %one = G_CONSTANT i64 1
1484 // %zero = G_CONSTANT i64 0
1485 // %and = G_AND %x, %one
1486 // %cmp = G_ICMP intpred(ne), %and, %zero
1487 // %cmp_trunc = G_TRUNC %cmp
1488 // G_BRCOND %cmp_trunc, %bb.3
1489 //
1490 // We want to try and fold the AND into the G_BRCOND and produce either a
1491 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1492 //
1493 // In this case, we'd get
1494 //
1495 // TBNZ %x %bb.3
1496 //
1497
1498 // Check if the AND has a constant on its RHS which we can use as a mask.
1499 // If it's a power of 2, then it's the same as checking a specific bit.
1500 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1501 auto MaybeBit = getIConstantVRegValWithLookThrough(
1502 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1503 if (!MaybeBit)
1504 return false;
1505
1506 int32_t Bit = MaybeBit->Value.exactLogBase2();
1507 if (Bit < 0)
1508 return false;
1509
1510 Register TestReg = AndInst.getOperand(1).getReg();
1511
1512 // Emit a TB(N)Z.
1513 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1514 return true;
1515}
1516
1517MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1518 bool IsNegative,
1519 MachineBasicBlock *DestMBB,
1520 MachineIRBuilder &MIB) const {
1521 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1521, __extension__ __PRETTY_FUNCTION__))
;
1522 MachineRegisterInfo &MRI = *MIB.getMRI();
1523 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1525, __extension__ __PRETTY_FUNCTION__))
1524 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1525, __extension__ __PRETTY_FUNCTION__))
1525 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1525, __extension__ __PRETTY_FUNCTION__))
;
1526 auto Ty = MRI.getType(CompareReg);
1527 unsigned Width = Ty.getSizeInBits();
1528 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
;
1529 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1529, __extension__ __PRETTY_FUNCTION__))
;
1530 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1531 {AArch64::CBNZW, AArch64::CBNZX}};
1532 unsigned Opc = OpcTable[IsNegative][Width == 64];
1533 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1534 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1535 return &*BranchMI;
1536}
1537
1538bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1539 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1540 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1540, __extension__ __PRETTY_FUNCTION__))
;
1541 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1541, __extension__ __PRETTY_FUNCTION__))
;
1542 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1543 // totally clean. Some of them require two branches to implement.
1544 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1545 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1546 Pred);
1547 AArch64CC::CondCode CC1, CC2;
1548 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1549 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1550 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1551 if (CC2 != AArch64CC::AL)
1552 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1553 I.eraseFromParent();
1554 return true;
1555}
1556
1557bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1558 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1559 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1559, __extension__ __PRETTY_FUNCTION__))
;
1560 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1560, __extension__ __PRETTY_FUNCTION__))
;
1561 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1562 //
1563 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1564 // instructions will not be produced, as they are conditional branch
1565 // instructions that do not set flags.
1566 if (!ProduceNonFlagSettingCondBr)
1567 return false;
1568
1569 MachineRegisterInfo &MRI = *MIB.getMRI();
1570 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1571 auto Pred =
1572 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1573 Register LHS = ICmp.getOperand(2).getReg();
1574 Register RHS = ICmp.getOperand(3).getReg();
1575
1576 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1577 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1578 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1579
1580 // When we can emit a TB(N)Z, prefer that.
1581 //
1582 // Handle non-commutative condition codes first.
1583 // Note that we don't want to do this when we have a G_AND because it can
1584 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1585 if (VRegAndVal && !AndInst) {
1586 int64_t C = VRegAndVal->Value.getSExtValue();
1587
1588 // When we have a greater-than comparison, we can just test if the msb is
1589 // zero.
1590 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1591 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1592 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1593 I.eraseFromParent();
1594 return true;
1595 }
1596
1597 // When we have a less than comparison, we can just test if the msb is not
1598 // zero.
1599 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1600 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1601 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1602 I.eraseFromParent();
1603 return true;
1604 }
1605 }
1606
1607 // Attempt to handle commutative condition codes. Right now, that's only
1608 // eq/ne.
1609 if (ICmpInst::isEquality(Pred)) {
1610 if (!VRegAndVal) {
1611 std::swap(RHS, LHS);
1612 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1613 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1614 }
1615
1616 if (VRegAndVal && VRegAndVal->Value == 0) {
1617 // If there's a G_AND feeding into this branch, try to fold it away by
1618 // emitting a TB(N)Z instead.
1619 //
1620 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1621 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1622 // would be redundant.
1623 if (AndInst &&
1624 tryOptAndIntoCompareBranch(
1625 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1626 I.eraseFromParent();
1627 return true;
1628 }
1629
1630 // Otherwise, try to emit a CB(N)Z instead.
1631 auto LHSTy = MRI.getType(LHS);
1632 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1633 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1634 I.eraseFromParent();
1635 return true;
1636 }
1637 }
1638 }
1639
1640 return false;
1641}
1642
1643bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1644 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1645 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1645, __extension__ __PRETTY_FUNCTION__))
;
1646 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1646, __extension__ __PRETTY_FUNCTION__))
;
1647 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1648 return true;
1649
1650 // Couldn't optimize. Emit a compare + a Bcc.
1651 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1652 auto PredOp = ICmp.getOperand(1);
1653 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1654 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1655 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1656 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1657 I.eraseFromParent();
1658 return true;
1659}
1660
1661bool AArch64InstructionSelector::selectCompareBranch(
1662 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1663 Register CondReg = I.getOperand(0).getReg();
1664 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1665 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1666 CondReg = CCMI->getOperand(1).getReg();
1667 CCMI = MRI.getVRegDef(CondReg);
1668 }
1669
1670 // Try to select the G_BRCOND using whatever is feeding the condition if
1671 // possible.
1672 unsigned CCMIOpc = CCMI->getOpcode();
1673 if (CCMIOpc == TargetOpcode::G_FCMP)
1674 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1675 if (CCMIOpc == TargetOpcode::G_ICMP)
1676 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1677
1678 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1679 // instructions will not be produced, as they are conditional branch
1680 // instructions that do not set flags.
1681 if (ProduceNonFlagSettingCondBr) {
1682 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1683 I.getOperand(1).getMBB(), MIB);
1684 I.eraseFromParent();
1685 return true;
1686 }
1687
1688 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1689 auto TstMI =
1690 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1691 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1692 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1693 .addImm(AArch64CC::EQ)
1694 .addMBB(I.getOperand(1).getMBB());
1695 I.eraseFromParent();
1696 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1697}
1698
1699/// Returns the element immediate value of a vector shift operand if found.
1700/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1701static Optional<int64_t> getVectorShiftImm(Register Reg,
1702 MachineRegisterInfo &MRI) {
1703 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1703, __extension__ __PRETTY_FUNCTION__))
;
1704 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1705 assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand"
) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1705, __extension__ __PRETTY_FUNCTION__))
;
1706 return getAArch64VectorSplatScalar(*OpMI, MRI);
1707}
1708
1709/// Matches and returns the shift immediate value for a SHL instruction given
1710/// a shift operand.
1711static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1712 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1713 if (!ShiftImm)
1714 return None;
1715 // Check the immediate is in range for a SHL.
1716 int64_t Imm = *ShiftImm;
1717 if (Imm < 0)
1718 return None;
1719 switch (SrcTy.getElementType().getSizeInBits()) {
1720 default:
1721 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1722 return None;
1723 case 8:
1724 if (Imm > 7)
1725 return None;
1726 break;
1727 case 16:
1728 if (Imm > 15)
1729 return None;
1730 break;
1731 case 32:
1732 if (Imm > 31)
1733 return None;
1734 break;
1735 case 64:
1736 if (Imm > 63)
1737 return None;
1738 break;
1739 }
1740 return Imm;
1741}
1742
1743bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1744 MachineRegisterInfo &MRI) {
1745 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
;
1746 Register DstReg = I.getOperand(0).getReg();
1747 const LLT Ty = MRI.getType(DstReg);
1748 Register Src1Reg = I.getOperand(1).getReg();
1749 Register Src2Reg = I.getOperand(2).getReg();
1750
1751 if (!Ty.isVector())
1752 return false;
1753
1754 // Check if we have a vector of constants on RHS that we can select as the
1755 // immediate form.
1756 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1757
1758 unsigned Opc = 0;
1759 if (Ty == LLT::fixed_vector(2, 64)) {
1760 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1761 } else if (Ty == LLT::fixed_vector(4, 32)) {
1762 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1763 } else if (Ty == LLT::fixed_vector(2, 32)) {
1764 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1765 } else if (Ty == LLT::fixed_vector(4, 16)) {
1766 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1767 } else if (Ty == LLT::fixed_vector(8, 16)) {
1768 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1769 } else if (Ty == LLT::fixed_vector(16, 8)) {
1770 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1771 } else if (Ty == LLT::fixed_vector(8, 8)) {
1772 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1773 } else {
1774 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1775 return false;
1776 }
1777
1778 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1779 if (ImmVal)
1780 Shl.addImm(*ImmVal);
1781 else
1782 Shl.addUse(Src2Reg);
1783 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1784 I.eraseFromParent();
1785 return true;
1786}
1787
1788bool AArch64InstructionSelector::selectVectorAshrLshr(
1789 MachineInstr &I, MachineRegisterInfo &MRI) {
1790 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1791, __extension__ __PRETTY_FUNCTION__))
1791 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1791, __extension__ __PRETTY_FUNCTION__))
;
1792 Register DstReg = I.getOperand(0).getReg();
1793 const LLT Ty = MRI.getType(DstReg);
1794 Register Src1Reg = I.getOperand(1).getReg();
1795 Register Src2Reg = I.getOperand(2).getReg();
1796
1797 if (!Ty.isVector())
1798 return false;
1799
1800 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1801
1802 // We expect the immediate case to be lowered in the PostLegalCombiner to
1803 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1804
1805 // There is not a shift right register instruction, but the shift left
1806 // register instruction takes a signed value, where negative numbers specify a
1807 // right shift.
1808
1809 unsigned Opc = 0;
1810 unsigned NegOpc = 0;
1811 const TargetRegisterClass *RC =
1812 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1813 if (Ty == LLT::fixed_vector(2, 64)) {
1814 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1815 NegOpc = AArch64::NEGv2i64;
1816 } else if (Ty == LLT::fixed_vector(4, 32)) {
1817 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1818 NegOpc = AArch64::NEGv4i32;
1819 } else if (Ty == LLT::fixed_vector(2, 32)) {
1820 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1821 NegOpc = AArch64::NEGv2i32;
1822 } else if (Ty == LLT::fixed_vector(4, 16)) {
1823 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1824 NegOpc = AArch64::NEGv4i16;
1825 } else if (Ty == LLT::fixed_vector(8, 16)) {
1826 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1827 NegOpc = AArch64::NEGv8i16;
1828 } else if (Ty == LLT::fixed_vector(16, 8)) {
1829 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1830 NegOpc = AArch64::NEGv16i8;
1831 } else if (Ty == LLT::fixed_vector(8, 8)) {
1832 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1833 NegOpc = AArch64::NEGv8i8;
1834 } else {
1835 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1836 return false;
1837 }
1838
1839 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1840 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1841 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1842 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1843 I.eraseFromParent();
1844 return true;
1845}
1846
1847bool AArch64InstructionSelector::selectVaStartAAPCS(
1848 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1849 return false;
1850}
1851
1852bool AArch64InstructionSelector::selectVaStartDarwin(
1853 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1854 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1855 Register ListReg = I.getOperand(0).getReg();
1856
1857 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1858
1859 auto MIB =
1860 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1861 .addDef(ArgsAddrReg)
1862 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1863 .addImm(0)
1864 .addImm(0);
1865
1866 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1867
1868 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1869 .addUse(ArgsAddrReg)
1870 .addUse(ListReg)
1871 .addImm(0)
1872 .addMemOperand(*I.memoperands_begin());
1873
1874 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1875 I.eraseFromParent();
1876 return true;
1877}
1878
1879void AArch64InstructionSelector::materializeLargeCMVal(
1880 MachineInstr &I, const Value *V, unsigned OpFlags) {
1881 MachineBasicBlock &MBB = *I.getParent();
1882 MachineFunction &MF = *MBB.getParent();
1883 MachineRegisterInfo &MRI = MF.getRegInfo();
1884
1885 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1886 MovZ->addOperand(MF, I.getOperand(1));
1887 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1888 AArch64II::MO_NC);
1889 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1890 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1891
1892 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1893 Register ForceDstReg) {
1894 Register DstReg = ForceDstReg
1895 ? ForceDstReg
1896 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1897 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1898 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1899 MovI->addOperand(MF, MachineOperand::CreateGA(
1900 GV, MovZ->getOperand(1).getOffset(), Flags));
1901 } else {
1902 MovI->addOperand(
1903 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1904 MovZ->getOperand(1).getOffset(), Flags));
1905 }
1906 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1907 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1908 return DstReg;
1909 };
1910 Register DstReg = BuildMovK(MovZ.getReg(0),
1911 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1912 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1913 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1914}
1915
1916bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1917 MachineBasicBlock &MBB = *I.getParent();
1918 MachineFunction &MF = *MBB.getParent();
1919 MachineRegisterInfo &MRI = MF.getRegInfo();
1920
1921 switch (I.getOpcode()) {
1922 case TargetOpcode::G_STORE: {
1923 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1924 MachineOperand &SrcOp = I.getOperand(0);
1925 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1926 // Allow matching with imported patterns for stores of pointers. Unlike
1927 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1928 // and constrain.
1929 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1930 Register NewSrc = Copy.getReg(0);
1931 SrcOp.setReg(NewSrc);
1932 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1933 Changed = true;
1934 }
1935 return Changed;
1936 }
1937 case TargetOpcode::G_PTR_ADD:
1938 return convertPtrAddToAdd(I, MRI);
1939 case TargetOpcode::G_LOAD: {
1940 // For scalar loads of pointers, we try to convert the dest type from p0
1941 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1942 // conversion, this should be ok because all users should have been
1943 // selected already, so the type doesn't matter for them.
1944 Register DstReg = I.getOperand(0).getReg();
1945 const LLT DstTy = MRI.getType(DstReg);
1946 if (!DstTy.isPointer())
1947 return false;
1948 MRI.setType(DstReg, LLT::scalar(64));
1949 return true;
1950 }
1951 case AArch64::G_DUP: {
1952 // Convert the type from p0 to s64 to help selection.
1953 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1954 if (!DstTy.getElementType().isPointer())
1955 return false;
1956 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1957 MRI.setType(I.getOperand(0).getReg(),
1958 DstTy.changeElementType(LLT::scalar(64)));
1959 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1960 I.getOperand(1).setReg(NewSrc.getReg(0));
1961 return true;
1962 }
1963 case TargetOpcode::G_UITOFP:
1964 case TargetOpcode::G_SITOFP: {
1965 // If both source and destination regbanks are FPR, then convert the opcode
1966 // to G_SITOF so that the importer can select it to an fpr variant.
1967 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1968 // copy.
1969 Register SrcReg = I.getOperand(1).getReg();
1970 LLT SrcTy = MRI.getType(SrcReg);
1971 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1972 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1973 return false;
1974
1975 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1976 if (I.getOpcode() == TargetOpcode::G_SITOFP)
1977 I.setDesc(TII.get(AArch64::G_SITOF));
1978 else
1979 I.setDesc(TII.get(AArch64::G_UITOF));
1980 return true;
1981 }
1982 return false;
1983 }
1984 default:
1985 return false;
1986 }
1987}
1988
1989/// This lowering tries to look for G_PTR_ADD instructions and then converts
1990/// them to a standard G_ADD with a COPY on the source.
1991///
1992/// The motivation behind this is to expose the add semantics to the imported
1993/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1994/// because the selector works bottom up, uses before defs. By the time we
1995/// end up trying to select a G_PTR_ADD, we should have already attempted to
1996/// fold this into addressing modes and were therefore unsuccessful.
1997bool AArch64InstructionSelector::convertPtrAddToAdd(
1998 MachineInstr &I, MachineRegisterInfo &MRI) {
1999 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1999, __extension__ __PRETTY_FUNCTION__))
;
2000 Register DstReg = I.getOperand(0).getReg();
2001 Register AddOp1Reg = I.getOperand(1).getReg();
2002 const LLT PtrTy = MRI.getType(DstReg);
2003 if (PtrTy.getAddressSpace() != 0)
2004 return false;
2005
2006 const LLT CastPtrTy =
2007 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2008 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2009 // Set regbanks on the registers.
2010 if (PtrTy.isVector())
2011 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2012 else
2013 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2014
2015 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2016 // %dst(intty) = G_ADD %intbase, off
2017 I.setDesc(TII.get(TargetOpcode::G_ADD));
2018 MRI.setType(DstReg, CastPtrTy);
2019 I.getOperand(1).setReg(PtrToInt.getReg(0));
2020 if (!select(*PtrToInt)) {
2021 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2022 return false;
2023 }
2024
2025 // Also take the opportunity here to try to do some optimization.
2026 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2027 Register NegatedReg;
2028 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2029 return true;
2030 I.getOperand(2).setReg(NegatedReg);
2031 I.setDesc(TII.get(TargetOpcode::G_SUB));
2032 return true;
2033}
2034
2035bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2036 MachineRegisterInfo &MRI) {
2037 // We try to match the immediate variant of LSL, which is actually an alias
2038 // for a special case of UBFM. Otherwise, we fall back to the imported
2039 // selector which will match the register variant.
2040 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2040, __extension__ __PRETTY_FUNCTION__))
;
2041 const auto &MO = I.getOperand(2);
2042 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2043 if (!VRegAndVal)
2044 return false;
2045
2046 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2047 if (DstTy.isVector())
2048 return false;
2049 bool Is64Bit = DstTy.getSizeInBits() == 64;
2050 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2051 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2052
2053 if (!Imm1Fn || !Imm2Fn)
2054 return false;
2055
2056 auto NewI =
2057 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2058 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2059
2060 for (auto &RenderFn : *Imm1Fn)
2061 RenderFn(NewI);
2062 for (auto &RenderFn : *Imm2Fn)
2063 RenderFn(NewI);
2064
2065 I.eraseFromParent();
2066 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2067}
2068
2069bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2070 MachineInstr &I, MachineRegisterInfo &MRI) {
2071 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2071, __extension__ __PRETTY_FUNCTION__))
;
2072 // If we're storing a scalar, it doesn't matter what register bank that
2073 // scalar is on. All that matters is the size.
2074 //
2075 // So, if we see something like this (with a 32-bit scalar as an example):
2076 //
2077 // %x:gpr(s32) = ... something ...
2078 // %y:fpr(s32) = COPY %x:gpr(s32)
2079 // G_STORE %y:fpr(s32)
2080 //
2081 // We can fix this up into something like this:
2082 //
2083 // G_STORE %x:gpr(s32)
2084 //
2085 // And then continue the selection process normally.
2086 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2087 if (!DefDstReg.isValid())
2088 return false;
2089 LLT DefDstTy = MRI.getType(DefDstReg);
2090 Register StoreSrcReg = I.getOperand(0).getReg();
2091 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2092
2093 // If we get something strange like a physical register, then we shouldn't
2094 // go any further.
2095 if (!DefDstTy.isValid())
2096 return false;
2097
2098 // Are the source and dst types the same size?
2099 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2100 return false;
2101
2102 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2103 RBI.getRegBank(DefDstReg, MRI, TRI))
2104 return false;
2105
2106 // We have a cross-bank copy, which is entering a store. Let's fold it.
2107 I.getOperand(0).setReg(DefDstReg);
2108 return true;
2109}
2110
2111bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2112 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2112, __extension__ __PRETTY_FUNCTION__))
;
2113 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2113, __extension__ __PRETTY_FUNCTION__))
;
2114
2115 MachineBasicBlock &MBB = *I.getParent();
2116 MachineFunction &MF = *MBB.getParent();
2117 MachineRegisterInfo &MRI = MF.getRegInfo();
2118
2119 switch (I.getOpcode()) {
2120 case AArch64::G_DUP: {
2121 // Before selecting a DUP instruction, check if it is better selected as a
2122 // MOV or load from a constant pool.
2123 Register Src = I.getOperand(1).getReg();
2124 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2125 if (!ValAndVReg)
2126 return false;
2127 LLVMContext &Ctx = MF.getFunction().getContext();
2128 Register Dst = I.getOperand(0).getReg();
2129 auto *CV = ConstantDataVector::getSplat(
2130 MRI.getType(Dst).getNumElements(),
2131 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2132 ValAndVReg->Value));
2133 if (!emitConstantVector(Dst, CV, MIB, MRI))
2134 return false;
2135 I.eraseFromParent();
2136 return true;
2137 }
2138 case TargetOpcode::G_SEXT:
2139 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2140 // over a normal extend.
2141 if (selectUSMovFromExtend(I, MRI))
2142 return true;
2143 return false;
2144 case TargetOpcode::G_BR:
2145 return false;
2146 case TargetOpcode::G_SHL:
2147 return earlySelectSHL(I, MRI);
2148 case TargetOpcode::G_CONSTANT: {
2149 bool IsZero = false;
2150 if (I.getOperand(1).isCImm())
2151 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2152 else if (I.getOperand(1).isImm())
2153 IsZero = I.getOperand(1).getImm() == 0;
2154
2155 if (!IsZero)
2156 return false;
2157
2158 Register DefReg = I.getOperand(0).getReg();
2159 LLT Ty = MRI.getType(DefReg);
2160 if (Ty.getSizeInBits() == 64) {
2161 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2162 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2163 } else if (Ty.getSizeInBits() == 32) {
2164 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2165 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2166 } else
2167 return false;
2168
2169 I.setDesc(TII.get(TargetOpcode::COPY));
2170 return true;
2171 }
2172
2173 case TargetOpcode::G_ADD: {
2174 // Check if this is being fed by a G_ICMP on either side.
2175 //
2176 // (cmp pred, x, y) + z
2177 //
2178 // In the above case, when the cmp is true, we increment z by 1. So, we can
2179 // fold the add into the cset for the cmp by using cinc.
2180 //
2181 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2182 Register AddDst = I.getOperand(0).getReg();
2183 Register AddLHS = I.getOperand(1).getReg();
2184 Register AddRHS = I.getOperand(2).getReg();
2185 // Only handle scalars.
2186 LLT Ty = MRI.getType(AddLHS);
2187 if (Ty.isVector())
2188 return false;
2189 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2190 // bits.
2191 unsigned Size = Ty.getSizeInBits();
2192 if (Size != 32 && Size != 64)
2193 return false;
2194 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2195 if (!MRI.hasOneNonDBGUse(Reg))
2196 return nullptr;
2197 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2198 // compare.
2199 if (Size == 32)
2200 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2201 // We model scalar compares using 32-bit destinations right now.
2202 // If it's a 64-bit compare, it'll have 64-bit sources.
2203 Register ZExt;
2204 if (!mi_match(Reg, MRI,
2205 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2206 return nullptr;
2207 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2208 if (!Cmp ||
2209 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2210 return nullptr;
2211 return Cmp;
2212 };
2213 // Try to match
2214 // z + (cmp pred, x, y)
2215 MachineInstr *Cmp = MatchCmp(AddRHS);
2216 if (!Cmp) {
2217 // (cmp pred, x, y) + z
2218 std::swap(AddLHS, AddRHS);
2219 Cmp = MatchCmp(AddRHS);
2220 if (!Cmp)
2221 return false;
2222 }
2223 auto &PredOp = Cmp->getOperand(1);
2224 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2225 const AArch64CC::CondCode InvCC =
2226 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2227 MIB.setInstrAndDebugLoc(I);
2228 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2229 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2230 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2231 I.eraseFromParent();
2232 return true;
2233 }
2234 case TargetOpcode::G_OR: {
2235 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2236 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2237 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2238 Register Dst = I.getOperand(0).getReg();
2239 LLT Ty = MRI.getType(Dst);
2240
2241 if (!Ty.isScalar())
2242 return false;
2243
2244 unsigned Size = Ty.getSizeInBits();
2245 if (Size != 32 && Size != 64)
2246 return false;
2247
2248 Register ShiftSrc;
2249 int64_t ShiftImm;
2250 Register MaskSrc;
2251 int64_t MaskImm;
2252 if (!mi_match(
2253 Dst, MRI,
2254 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2255 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2256 return false;
2257
2258 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2259 return false;
2260
2261 int64_t Immr = Size - ShiftImm;
2262 int64_t Imms = Size - ShiftImm - 1;
2263 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2264 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2265 I.eraseFromParent();
2266 return true;
2267 }
2268 default:
2269 return false;
2270 }
2271}
2272
2273bool AArch64InstructionSelector::select(MachineInstr &I) {
2274 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2274, __extension__ __PRETTY_FUNCTION__))
;
2275 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2275, __extension__ __PRETTY_FUNCTION__))
;
2276
2277 MachineBasicBlock &MBB = *I.getParent();
2278 MachineFunction &MF = *MBB.getParent();
2279 MachineRegisterInfo &MRI = MF.getRegInfo();
2280
2281 const AArch64Subtarget *Subtarget =
2282 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2283 if (Subtarget->requiresStrictAlign()) {
2284 // We don't support this feature yet.
2285 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2286 return false;
2287 }
2288
2289 MIB.setInstrAndDebugLoc(I);
2290
2291 unsigned Opcode = I.getOpcode();
2292 // G_PHI requires same handling as PHI
2293 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2294 // Certain non-generic instructions also need some special handling.
2295
2296 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2297 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2298
2299 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2300 const Register DefReg = I.getOperand(0).getReg();
2301 const LLT DefTy = MRI.getType(DefReg);
2302
2303 const RegClassOrRegBank &RegClassOrBank =
2304 MRI.getRegClassOrRegBank(DefReg);
2305
2306 const TargetRegisterClass *DefRC
2307 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2308 if (!DefRC) {
2309 if (!DefTy.isValid()) {
2310 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2311 return false;
2312 }
2313 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2314 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2315 if (!DefRC) {
2316 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2317 return false;
2318 }
2319 }
2320
2321 I.setDesc(TII.get(TargetOpcode::PHI));
2322
2323 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2324 }
2325
2326 if (I.isCopy())
2327 return selectCopy(I, TII, MRI, TRI, RBI);
2328
2329 return true;
2330 }
2331
2332
2333 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2334 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2335 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2336 return false;
2337 }
2338
2339 // Try to do some lowering before we start instruction selecting. These
2340 // lowerings are purely transformations on the input G_MIR and so selection
2341 // must continue after any modification of the instruction.
2342 if (preISelLower(I)) {
2343 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2344 }
2345
2346 // There may be patterns where the importer can't deal with them optimally,
2347 // but does select it to a suboptimal sequence so our custom C++ selection
2348 // code later never has a chance to work on it. Therefore, we have an early
2349 // selection attempt here to give priority to certain selection routines
2350 // over the imported ones.
2351 if (earlySelect(I))
2352 return true;
2353
2354 if (selectImpl(I, *CoverageInfo))
2355 return true;
2356
2357 LLT Ty =
2358 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2359
2360 switch (Opcode) {
2361 case TargetOpcode::G_SBFX:
2362 case TargetOpcode::G_UBFX: {
2363 static const unsigned OpcTable[2][2] = {
2364 {AArch64::UBFMWri, AArch64::UBFMXri},
2365 {AArch64::SBFMWri, AArch64::SBFMXri}};
2366 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2367 unsigned Size = Ty.getSizeInBits();
2368 unsigned Opc = OpcTable[IsSigned][Size == 64];
2369 auto Cst1 =
2370 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2371 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2371, __extension__ __PRETTY_FUNCTION__))
;
2372 auto Cst2 =
2373 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2374 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2374, __extension__ __PRETTY_FUNCTION__))
;
2375 auto LSB = Cst1->Value.getZExtValue();
2376 auto Width = Cst2->Value.getZExtValue();
2377 auto BitfieldInst =
2378 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2379 .addImm(LSB)
2380 .addImm(LSB + Width - 1);
2381 I.eraseFromParent();
2382 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2383 }
2384 case TargetOpcode::G_BRCOND:
2385 return selectCompareBranch(I, MF, MRI);
2386
2387 case TargetOpcode::G_BRINDIRECT: {
2388 I.setDesc(TII.get(AArch64::BR));
2389 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2390 }
2391
2392 case TargetOpcode::G_BRJT:
2393 return selectBrJT(I, MRI);
2394
2395 case AArch64::G_ADD_LOW: {
2396 // This op may have been separated from it's ADRP companion by the localizer
2397 // or some other code motion pass. Given that many CPUs will try to
2398 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2399 // which will later be expanded into an ADRP+ADD pair after scheduling.
2400 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2401 if (BaseMI->getOpcode() != AArch64::ADRP) {
2402 I.setDesc(TII.get(AArch64::ADDXri));
2403 I.addOperand(MachineOperand::CreateImm(0));
2404 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2405 }
2406 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2407, __extension__ __PRETTY_FUNCTION__))
2407 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2407, __extension__ __PRETTY_FUNCTION__))
;
2408 auto Op1 = BaseMI->getOperand(1);
2409 auto Op2 = I.getOperand(2);
2410 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2411 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2412 Op1.getTargetFlags())
2413 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2414 Op2.getTargetFlags());
2415 I.eraseFromParent();
2416 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2417 }
2418
2419 case TargetOpcode::G_BSWAP: {
2420 // Handle vector types for G_BSWAP directly.
2421 Register DstReg = I.getOperand(0).getReg();
2422 LLT DstTy = MRI.getType(DstReg);
2423
2424 // We should only get vector types here; everything else is handled by the
2425 // importer right now.
2426 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2427 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2428 return false;
2429 }
2430
2431 // Only handle 4 and 2 element vectors for now.
2432 // TODO: 16-bit elements.
2433 unsigned NumElts = DstTy.getNumElements();
2434 if (NumElts != 4 && NumElts != 2) {
2435 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2436 return false;
2437 }
2438
2439 // Choose the correct opcode for the supported types. Right now, that's
2440 // v2s32, v4s32, and v2s64.
2441 unsigned Opc = 0;
2442 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2443 if (EltSize == 32)
2444 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2445 : AArch64::REV32v16i8;
2446 else if (EltSize == 64)
2447 Opc = AArch64::REV64v16i8;
2448
2449 // We should always get something by the time we get here...
2450 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2450, __extension__ __PRETTY_FUNCTION__))
;
2451
2452 I.setDesc(TII.get(Opc));
2453 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2454 }
2455
2456 case TargetOpcode::G_FCONSTANT:
2457 case TargetOpcode::G_CONSTANT: {
2458 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2459
2460 const LLT s8 = LLT::scalar(8);
2461 const LLT s16 = LLT::scalar(16);
2462 const LLT s32 = LLT::scalar(32);
2463 const LLT s64 = LLT::scalar(64);
2464 const LLT s128 = LLT::scalar(128);
2465 const LLT p0 = LLT::pointer(0, 64);
2466
2467 const Register DefReg = I.getOperand(0).getReg();
2468 const LLT DefTy = MRI.getType(DefReg);
2469 const unsigned DefSize = DefTy.getSizeInBits();
2470 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2471
2472 // FIXME: Redundant check, but even less readable when factored out.
2473 if (isFP) {
2474 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2475 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2476 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2477 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2478 return false;
2479 }
2480
2481 if (RB.getID() != AArch64::FPRRegBankID) {
2482 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2483 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2484 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2485 return false;
2486 }
2487
2488 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2489 // can be sure tablegen works correctly and isn't rescued by this code.
2490 // 0.0 is not covered by tablegen for FP128. So we will handle this
2491 // scenario in the code here.
2492 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2493 return false;
2494 } else {
2495 // s32 and s64 are covered by tablegen.
2496 if (Ty != p0 && Ty != s8 && Ty != s16) {
2497 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2498 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2499 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2500 return false;
2501 }
2502
2503 if (RB.getID() != AArch64::GPRRegBankID) {
2504 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2505 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2506 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2507 return false;
2508 }
2509 }
2510
2511 if (isFP) {
2512 const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2513 // For 16, 64, and 128b values, emit a constant pool load.
2514 switch (DefSize) {
2515 default:
2516 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2516)
;
2517 case 32:
2518 // For s32, use a cp load if we have optsize/minsize.
2519 if (!shouldOptForSize(&MF))
2520 break;
2521 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2522 case 16:
2523 case 64:
2524 case 128: {
2525 auto *FPImm = I.getOperand(1).getFPImm();
2526 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2527 if (!LoadMI) {
2528 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2529 return false;
2530 }
2531 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2532 I.eraseFromParent();
2533 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2534 }
2535 }
2536
2537 // Either emit a FMOV, or emit a copy to emit a normal mov.
2538 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2539, __extension__ __PRETTY_FUNCTION__))
2539 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2539, __extension__ __PRETTY_FUNCTION__))
;
2540 const Register DefGPRReg =
2541 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2542 MachineOperand &RegOp = I.getOperand(0);
2543 RegOp.setReg(DefGPRReg);
2544 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2545 MIB.buildCopy({DefReg}, {DefGPRReg});
2546
2547 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2548 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2549 return false;
2550 }
2551
2552 MachineOperand &ImmOp = I.getOperand(1);
2553 // FIXME: Is going through int64_t always correct?
2554 ImmOp.ChangeToImmediate(
2555 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2556 } else if (I.getOperand(1).isCImm()) {
2557 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2558 I.getOperand(1).ChangeToImmediate(Val);
2559 } else if (I.getOperand(1).isImm()) {
2560 uint64_t Val = I.getOperand(1).getImm();
2561 I.getOperand(1).ChangeToImmediate(Val);
2562 }
2563
2564 const unsigned MovOpc =
2565 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2566 I.setDesc(TII.get(MovOpc));
2567 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2568 return true;
2569 }
2570 case TargetOpcode::G_EXTRACT: {
2571 Register DstReg = I.getOperand(0).getReg();
2572 Register SrcReg = I.getOperand(1).getReg();
2573 LLT SrcTy = MRI.getType(SrcReg);
2574 LLT DstTy = MRI.getType(DstReg);
2575 (void)DstTy;
2576 unsigned SrcSize = SrcTy.getSizeInBits();
2577
2578 if (SrcTy.getSizeInBits() > 64) {
2579 // This should be an extract of an s128, which is like a vector extract.
2580 if (SrcTy.getSizeInBits() != 128)
2581 return false;
2582 // Only support extracting 64 bits from an s128 at the moment.
2583 if (DstTy.getSizeInBits() != 64)
2584 return false;
2585
2586 unsigned Offset = I.getOperand(2).getImm();
2587 if (Offset % 64 != 0)
2588 return false;
2589
2590 // Check we have the right regbank always.
2591 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2592 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2593 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2593, __extension__ __PRETTY_FUNCTION__))
;
2594
2595 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2596 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2597 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2598 I.eraseFromParent();
2599 return true;
2600 }
2601
2602 // Emit the same code as a vector extract.
2603 // Offset must be a multiple of 64.
2604 unsigned LaneIdx = Offset / 64;
2605 MachineInstr *Extract = emitExtractVectorElt(
2606 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2607 if (!Extract)
2608 return false;
2609 I.eraseFromParent();
2610 return true;
2611 }
2612
2613 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2614 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2615 Ty.getSizeInBits() - 1);
2616
2617 if (SrcSize < 64) {
2618 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2619, __extension__ __PRETTY_FUNCTION__))
2619 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2619, __extension__ __PRETTY_FUNCTION__))
;
2620 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2621 }
2622
2623 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2624 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2625 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2626 .addReg(DstReg, 0, AArch64::sub_32);
2627 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2628 AArch64::GPR32RegClass, MRI);
2629 I.getOperand(0).setReg(DstReg);
2630
2631 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2632 }
2633
2634 case TargetOpcode::G_INSERT: {
2635 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2636 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2637 unsigned DstSize = DstTy.getSizeInBits();
2638 // Larger inserts are vectors, same-size ones should be something else by
2639 // now (split up or turned into COPYs).
2640 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2641 return false;
2642
2643 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2644 unsigned LSB = I.getOperand(3).getImm();
2645 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2646 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2647 MachineInstrBuilder(MF, I).addImm(Width - 1);
2648
2649 if (DstSize < 64) {
2650 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2651, __extension__ __PRETTY_FUNCTION__))
2651 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2651, __extension__ __PRETTY_FUNCTION__))
;
2652 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2653 }
2654
2655 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2656 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2657 TII.get(AArch64::SUBREG_TO_REG))
2658 .addDef(SrcReg)
2659 .addImm(0)
2660 .addUse(I.getOperand(2).getReg())
2661 .addImm(AArch64::sub_32);
2662 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2663 AArch64::GPR32RegClass, MRI);
2664 I.getOperand(2).setReg(SrcReg);
2665
2666 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2667 }
2668 case TargetOpcode::G_FRAME_INDEX: {
2669 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2670 if (Ty != LLT::pointer(0, 64)) {
2671 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2672 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2673 return false;
2674 }
2675 I.setDesc(TII.get(AArch64::ADDXri));
2676
2677 // MOs for a #0 shifted immediate.
2678 I.addOperand(MachineOperand::CreateImm(0));
2679 I.addOperand(MachineOperand::CreateImm(0));
2680
2681 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2682 }
2683
2684 case TargetOpcode::G_GLOBAL_VALUE: {
2685 auto GV = I.getOperand(1).getGlobal();
2686 if (GV->isThreadLocal())
2687 return selectTLSGlobalValue(I, MRI);
2688
2689 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2690 if (OpFlags & AArch64II::MO_GOT) {
2691 I.setDesc(TII.get(AArch64::LOADgot));
2692 I.getOperand(1).setTargetFlags(OpFlags);
2693 } else if (TM.getCodeModel() == CodeModel::Large) {
2694 // Materialize the global using movz/movk instructions.
2695 materializeLargeCMVal(I, GV, OpFlags);
2696 I.eraseFromParent();
2697 return true;
2698 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2699 I.setDesc(TII.get(AArch64::ADR));
2700 I.getOperand(1).setTargetFlags(OpFlags);
2701 } else {
2702 I.setDesc(TII.get(AArch64::MOVaddr));
2703 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2704 MachineInstrBuilder MIB(MF, I);
2705 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2706 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2707 }
2708 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2709 }
2710
2711 case TargetOpcode::G_ZEXTLOAD:
2712 case TargetOpcode::G_LOAD:
2713 case TargetOpcode::G_STORE: {
2714 GLoadStore &LdSt = cast<GLoadStore>(I);
2715 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2716 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2717
2718 if (PtrTy != LLT::pointer(0, 64)) {
2719 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2720 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2721 return false;
2722 }
2723
2724 uint64_t MemSizeInBytes = LdSt.getMemSize();
2725 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2726 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2727
2728 // Need special instructions for atomics that affect ordering.
2729 if (Order != AtomicOrdering::NotAtomic &&
2730 Order != AtomicOrdering::Unordered &&
2731 Order != AtomicOrdering::Monotonic) {
2732 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2732, __extension__ __PRETTY_FUNCTION__))
;
2733 if (MemSizeInBytes > 64)
2734 return false;
2735
2736 if (isa<GLoad>(LdSt)) {
2737 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2738 AArch64::LDARW, AArch64::LDARX};
2739 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2740 } else {
2741 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2742 AArch64::STLRW, AArch64::STLRX};
2743 Register ValReg = LdSt.getReg(0);
2744 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2745 // Emit a subreg copy of 32 bits.
2746 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2747 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2748 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2749 I.getOperand(0).setReg(NewVal);
2750 }
2751 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2752 }
2753 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2754 return true;
2755 }
2756
2757#ifndef NDEBUG
2758 const Register PtrReg = LdSt.getPointerReg();
2759 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2760 // Check that the pointer register is valid.
2761 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2762, __extension__ __PRETTY_FUNCTION__))
2762 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2762, __extension__ __PRETTY_FUNCTION__))
;
2763 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2764, __extension__ __PRETTY_FUNCTION__))
2764 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2764, __extension__ __PRETTY_FUNCTION__))
;
2765#endif
2766
2767 const Register ValReg = LdSt.getReg(0);
2768 const LLT ValTy = MRI.getType(ValReg);
2769 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2770
2771 // The code below doesn't support truncating stores, so we need to split it
2772 // again.
2773 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2774 unsigned SubReg;
2775 LLT MemTy = LdSt.getMMO().getMemoryType();
2776 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2777 if (!getSubRegForClass(RC, TRI, SubReg))
2778 return false;
2779
2780 // Generate a subreg copy.
2781 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2782 .addReg(ValReg, 0, SubReg)
2783 .getReg(0);
2784 RBI.constrainGenericRegister(Copy, *RC, MRI);
2785 LdSt.getOperand(0).setReg(Copy);
2786 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2787 // If this is an any-extending load from the FPR bank, split it into a regular
2788 // load + extend.
2789 if (RB.getID() == AArch64::FPRRegBankID) {
2790 unsigned SubReg;
2791 LLT MemTy = LdSt.getMMO().getMemoryType();
2792 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2793 if (!getSubRegForClass(RC, TRI, SubReg))
2794 return false;
2795 Register OldDst = LdSt.getReg(0);
2796 Register NewDst =
2797 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2798 LdSt.getOperand(0).setReg(NewDst);
2799 MRI.setRegBank(NewDst, RB);
2800 // Generate a SUBREG_TO_REG to extend it.
2801 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2802 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2803 .addImm(0)
2804 .addUse(NewDst)
2805 .addImm(SubReg);
2806 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2807 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2808 MIB.setInstr(LdSt);
2809 }
2810 }
2811
2812 // Helper lambda for partially selecting I. Either returns the original
2813 // instruction with an updated opcode, or a new instruction.
2814 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2815 bool IsStore = isa<GStore>(I);
2816 const unsigned NewOpc =
2817 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2818 if (NewOpc == I.getOpcode())
2819 return nullptr;
2820 // Check if we can fold anything into the addressing mode.
2821 auto AddrModeFns =
2822 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2823 if (!AddrModeFns) {
2824 // Can't fold anything. Use the original instruction.
2825 I.setDesc(TII.get(NewOpc));
2826 I.addOperand(MachineOperand::CreateImm(0));
2827 return &I;
2828 }
2829
2830 // Folded something. Create a new instruction and return it.
2831 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2832 Register CurValReg = I.getOperand(0).getReg();
2833 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2834 NewInst.cloneMemRefs(I);
2835 for (auto &Fn : *AddrModeFns)
2836 Fn(NewInst);
2837 I.eraseFromParent();
2838 return &*NewInst;
2839 };
2840
2841 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2842 if (!LoadStore)
2843 return false;
2844
2845 // If we're storing a 0, use WZR/XZR.
2846 if (Opcode == TargetOpcode::G_STORE) {
2847 auto CVal = getIConstantVRegValWithLookThrough(
2848 LoadStore->getOperand(0).getReg(), MRI);
2849 if (CVal && CVal->Value == 0) {
2850 switch (LoadStore->getOpcode()) {
2851 case AArch64::STRWui:
2852 case AArch64::STRHHui:
2853 case AArch64::STRBBui:
2854 LoadStore->getOperand(0).setReg(AArch64::WZR);
2855 break;
2856 case AArch64::STRXui:
2857 LoadStore->getOperand(0).setReg(AArch64::XZR);
2858 break;
2859 }
2860 }
2861 }
2862
2863 if (IsZExtLoad) {
2864 // The zextload from a smaller type to i32 should be handled by the
2865 // importer.
2866 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2867 return false;
2868 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2869 // and zero_extend with SUBREG_TO_REG.
2870 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2871 Register DstReg = LoadStore->getOperand(0).getReg();
2872 LoadStore->getOperand(0).setReg(LdReg);
2873
2874 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2875 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2876 .addImm(0)
2877 .addUse(LdReg)
2878 .addImm(AArch64::sub_32);
2879 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2880 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2881 MRI);
2882 }
2883 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2884 }
2885
2886 case TargetOpcode::G_SMULH:
2887 case TargetOpcode::G_UMULH: {
2888 // Reject the various things we don't support yet.
2889 if (unsupportedBinOp(I, RBI, MRI, TRI))
2890 return false;
2891
2892 const Register DefReg = I.getOperand(0).getReg();
2893 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2894
2895 if (RB.getID() != AArch64::GPRRegBankID) {
2896 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2897 return false;
2898 }
2899
2900 if (Ty != LLT::scalar(64)) {
2901 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2902 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2903 return false;
2904 }
2905
2906 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2907 : AArch64::UMULHrr;
2908 I.setDesc(TII.get(NewOpc));
2909
2910 // Now that we selected an opcode, we need to constrain the register
2911 // operands to use appropriate classes.
2912 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2913 }
2914 case TargetOpcode::G_LSHR:
2915 case TargetOpcode::G_ASHR:
2916 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2917 return selectVectorAshrLshr(I, MRI);
2918 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2919 case TargetOpcode::G_SHL:
2920 if (Opcode == TargetOpcode::G_SHL &&
2921 MRI.getType(I.getOperand(0).getReg()).isVector())
2922 return selectVectorSHL(I, MRI);
2923
2924 // These shifts were legalized to have 64 bit shift amounts because we
2925 // want to take advantage of the selection patterns that assume the
2926 // immediates are s64s, however, selectBinaryOp will assume both operands
2927 // will have the same bit size.
2928 {
2929 Register SrcReg = I.getOperand(1).getReg();
2930 Register ShiftReg = I.getOperand(2).getReg();
2931 const LLT ShiftTy = MRI.getType(ShiftReg);
2932 const LLT SrcTy = MRI.getType(SrcReg);
2933 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
2934 ShiftTy.getSizeInBits() == 64) {
2935 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2935, __extension__ __PRETTY_FUNCTION__))
;
2936 assert(MRI.getVRegDef(ShiftReg) &&(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2937, __extension__ __PRETTY_FUNCTION__))
2937 "could not find a vreg definition for shift amount")(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2937, __extension__ __PRETTY_FUNCTION__))
;
2938 // Insert a subregister copy to implement a 64->32 trunc
2939 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
2940 .addReg(ShiftReg, 0, AArch64::sub_32);
2941 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2942 I.getOperand(2).setReg(Trunc.getReg(0));
2943 }
2944 }
2945 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2946 case TargetOpcode::G_FADD:
2947 case TargetOpcode::G_FSUB:
2948 case TargetOpcode::G_FMUL:
2949 case TargetOpcode::G_FDIV:
2950 case TargetOpcode::G_OR: {
2951 // Reject the various things we don't support yet.
2952 if (unsupportedBinOp(I, RBI, MRI, TRI))
2953 return false;
2954
2955 const unsigned OpSize = Ty.getSizeInBits();
2956
2957 const Register DefReg = I.getOperand(0).getReg();
2958 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2959
2960 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2961 if (NewOpc == I.getOpcode())
2962 return false;
2963
2964 I.setDesc(TII.get(NewOpc));
2965 // FIXME: Should the type be always reset in setDesc?
2966
2967 // Now that we selected an opcode, we need to constrain the register
2968 // operands to use appropriate classes.
2969 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2970 }
2971
2972 case TargetOpcode::G_PTR_ADD: {
2973 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2974 I.eraseFromParent();
2975 return true;
2976 }
2977 case TargetOpcode::G_SADDO:
2978 case TargetOpcode::G_UADDO:
2979 case TargetOpcode::G_SSUBO:
2980 case TargetOpcode::G_USUBO: {
2981 // Emit the operation and get the correct condition code.
2982 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2983 I.getOperand(2), I.getOperand(3), MIB);
2984
2985 // Now, put the overflow result in the register given by the first operand
2986 // to the overflow op. CSINC increments the result when the predicate is
2987 // false, so to get the increment when it's true, we need to use the
2988 // inverse. In this case, we want to increment when carry is set.
2989 Register ZReg = AArch64::WZR;
2990 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
2991 getInvertedCondCode(OpAndCC.second), MIB);
2992 I.eraseFromParent();
2993 return true;
2994 }
2995
2996 case TargetOpcode::G_PTRMASK: {
2997 Register MaskReg = I.getOperand(2).getReg();
2998 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
2999 // TODO: Implement arbitrary cases
3000 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3001 return false;
3002
3003 uint64_t Mask = *MaskVal;
3004 I.setDesc(TII.get(AArch64::ANDXri));
3005 I.getOperand(2).ChangeToImmediate(
3006 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3007
3008 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3009 }
3010 case TargetOpcode::G_PTRTOINT:
3011 case TargetOpcode::G_TRUNC: {
3012 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3013 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3014
3015 const Register DstReg = I.getOperand(0).getReg();
3016 const Register SrcReg = I.getOperand(1).getReg();
3017
3018 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3019 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3020
3021 if (DstRB.getID() != SrcRB.getID()) {
3022 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3023 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3024 return false;
3025 }
3026
3027 if (DstRB.getID() == AArch64::GPRRegBankID) {
3028 const TargetRegisterClass *DstRC =
3029 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3030 if (!DstRC)
3031 return false;
3032
3033 const TargetRegisterClass *SrcRC =
3034 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3035 if (!SrcRC)
3036 return false;
3037
3038 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3039 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3040 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3041 return false;
3042 }
3043
3044 if (DstRC == SrcRC) {
3045 // Nothing to be done
3046 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3047 SrcTy == LLT::scalar(64)) {
3048 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3048)
;
3049 return false;
3050 } else if (DstRC == &AArch64::GPR32RegClass &&
3051 SrcRC == &AArch64::GPR64RegClass) {
3052 I.getOperand(1).setSubReg(AArch64::sub_32);
3053 } else {
3054 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3055 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3056 return false;
3057 }
3058
3059 I.setDesc(TII.get(TargetOpcode::COPY));
3060 return true;
3061 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3062 if (DstTy == LLT::fixed_vector(4, 16) &&
3063 SrcTy == LLT::fixed_vector(4, 32)) {
3064 I.setDesc(TII.get(AArch64::XTNv4i16));
3065 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3066 return true;
3067 }
3068
3069 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3070 MachineInstr *Extract = emitExtractVectorElt(
3071 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3072 if (!Extract)
3073 return false;
3074 I.eraseFromParent();
3075 return true;
3076 }
3077
3078 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3079 if (Opcode == TargetOpcode::G_PTRTOINT) {
3080 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3080, __extension__ __PRETTY_FUNCTION__))
;
3081 I.setDesc(TII.get(TargetOpcode::COPY));
3082 return selectCopy(I, TII, MRI, TRI, RBI);
3083 }
3084 }
3085
3086 return false;
3087 }
3088
3089 case TargetOpcode::G_ANYEXT: {
3090 if (selectUSMovFromExtend(I, MRI))
3091 return true;
3092
3093 const Register DstReg = I.getOperand(0).getReg();
3094 const Register SrcReg = I.getOperand(1).getReg();
3095
3096 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3097 if (RBDst.getID() != AArch64::GPRRegBankID) {
3098 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3099 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3100 return false;
3101 }
3102
3103 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3104 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3105 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3106 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3107 return false;
3108 }
3109
3110 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3111
3112 if (DstSize == 0) {
3113 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3114 return false;
3115 }
3116
3117 if (DstSize != 64 && DstSize > 32) {
3118 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3119 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3120 return false;
3121 }
3122 // At this point G_ANYEXT is just like a plain COPY, but we need
3123 // to explicitly form the 64-bit value if any.
3124 if (DstSize > 32) {
3125 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3126 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3127 .addDef(ExtSrc)
3128 .addImm(0)
3129 .addUse(SrcReg)
3130 .addImm(AArch64::sub_32);
3131 I.getOperand(1).setReg(ExtSrc);
3132 }
3133 return selectCopy(I, TII, MRI, TRI, RBI);
3134 }
3135
3136 case TargetOpcode::G_ZEXT:
3137 case TargetOpcode::G_SEXT_INREG:
3138 case TargetOpcode::G_SEXT: {
3139 if (selectUSMovFromExtend(I, MRI))
3140 return true;
3141
3142 unsigned Opcode = I.getOpcode();
3143 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3144 const Register DefReg = I.getOperand(0).getReg();
3145 Register SrcReg = I.getOperand(1).getReg();
3146 const LLT DstTy = MRI.getType(DefReg);
3147 const LLT SrcTy = MRI.getType(SrcReg);
3148 unsigned DstSize = DstTy.getSizeInBits();
3149 unsigned SrcSize = SrcTy.getSizeInBits();
3150
3151 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3152 // extended is encoded in the imm.
3153 if (Opcode == TargetOpcode::G_SEXT_INREG)
3154 SrcSize = I.getOperand(2).getImm();
3155
3156 if (DstTy.isVector())
3157 return false; // Should be handled by imported patterns.
3158
3159 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3161, __extension__ __PRETTY_FUNCTION__))
3160 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3161, __extension__ __PRETTY_FUNCTION__))
3161 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3161, __extension__ __PRETTY_FUNCTION__))
;
3162
3163 MachineInstr *ExtI;
3164
3165 // First check if we're extending the result of a load which has a dest type
3166 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3167 // GPR register on AArch64 and all loads which are smaller automatically
3168 // zero-extend the upper bits. E.g.
3169 // %v(s8) = G_LOAD %p, :: (load 1)
3170 // %v2(s32) = G_ZEXT %v(s8)
3171 if (!IsSigned) {
3172 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3173 bool IsGPR =
3174 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3175 if (LoadMI && IsGPR) {
3176 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3177 unsigned BytesLoaded = MemOp->getSize();
3178 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3179 return selectCopy(I, TII, MRI, TRI, RBI);
3180 }
3181
3182 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3183 // + SUBREG_TO_REG.
3184 //
3185 // If we are zero extending from 32 bits to 64 bits, it's possible that
3186 // the instruction implicitly does the zero extend for us. In that case,
3187 // we only need the SUBREG_TO_REG.
3188 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3189 // Unlike with the G_LOAD case, we don't want to look through copies
3190 // here. (See isDef32.)
3191 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3192 Register SubregToRegSrc = SrcReg;
3193
3194 // Does the instruction implicitly zero extend?
3195 if (!Def || !isDef32(*Def)) {
3196 // No. Zero out using an OR.
3197 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3198 const Register ZReg = AArch64::WZR;
3199 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3200 SubregToRegSrc = OrDst;
3201 }
3202
3203 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3204 .addImm(0)
3205 .addUse(SubregToRegSrc)
3206 .addImm(AArch64::sub_32);
3207
3208 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3209 MRI)) {
3210 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3211 return false;
3212 }
3213
3214 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3215 MRI)) {
3216 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3217 return false;
3218 }
3219
3220 I.eraseFromParent();
3221 return true;
3222 }
3223 }
3224
3225 if (DstSize == 64) {
3226 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3227 // FIXME: Can we avoid manually doing this?
3228 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3229 MRI)) {
3230 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3231 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3232 return false;
3233 }
3234 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3235 {&AArch64::GPR64RegClass}, {})
3236 .addImm(0)
3237 .addUse(SrcReg)
3238 .addImm(AArch64::sub_32)
3239 .getReg(0);
3240 }
3241
3242 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3243 {DefReg}, {SrcReg})
3244 .addImm(0)
3245 .addImm(SrcSize - 1);
3246 } else if (DstSize <= 32) {
3247 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3248 {DefReg}, {SrcReg})
3249 .addImm(0)
3250 .addImm(SrcSize - 1);
3251 } else {
3252 return false;
3253 }
3254
3255 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3256 I.eraseFromParent();
3257 return true;
3258 }
3259
3260 case TargetOpcode::G_SITOFP:
3261 case TargetOpcode::G_UITOFP:
3262 case TargetOpcode::G_FPTOSI:
3263 case TargetOpcode::G_FPTOUI: {
3264 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3265 SrcTy = MRI.getType(I.getOperand(1).getReg());
3266 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3267 if (NewOpc == Opcode)
3268 return false;
3269
3270 I.setDesc(TII.get(NewOpc));
3271 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3272
3273 return true;
3274 }
3275
3276 case TargetOpcode::G_FREEZE:
3277 return selectCopy(I, TII, MRI, TRI, RBI);
3278
3279 case TargetOpcode::G_INTTOPTR:
3280 // The importer is currently unable to import pointer types since they
3281 // didn't exist in SelectionDAG.
3282 return selectCopy(I, TII, MRI, TRI, RBI);
3283
3284 case TargetOpcode::G_BITCAST:
3285 // Imported SelectionDAG rules can handle every bitcast except those that
3286 // bitcast from a type to the same type. Ideally, these shouldn't occur
3287 // but we might not run an optimizer that deletes them. The other exception
3288 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3289 // of them.
3290 return selectCopy(I, TII, MRI, TRI, RBI);
3291
3292 case TargetOpcode::G_SELECT: {
3293 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3294 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3295 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3296 return false;
3297 }
3298
3299 const Register CondReg = I.getOperand(1).getReg();
3300 const Register TReg = I.getOperand(2).getReg();
3301 const Register FReg = I.getOperand(3).getReg();
3302
3303 if (tryOptSelect(I))
3304 return true;
3305
3306 // Make sure to use an unused vreg instead of wzr, so that the peephole
3307 // optimizations will be able to optimize these.
3308 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3309 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3310 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3311 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3312 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3313 return false;
3314 I.eraseFromParent();
3315 return true;
3316 }
3317 case TargetOpcode::G_ICMP: {
3318 if (Ty.isVector())
3319 return selectVectorICmp(I, MRI);
3320
3321 if (Ty != LLT::scalar(32)) {
3322 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3323 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3324 return false;
3325 }
3326
3327 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3328 const AArch64CC::CondCode InvCC =
3329 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3330 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3331 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3332 /*Src2=*/AArch64::WZR, InvCC, MIB);
3333 I.eraseFromParent();
3334 return true;
3335 }
3336
3337 case TargetOpcode::G_FCMP: {
3338 CmpInst::Predicate Pred =
3339 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3340 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3341 Pred) ||
3342 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3343 return false;
3344 I.eraseFromParent();
3345 return true;
3346 }
3347 case TargetOpcode::G_VASTART:
3348 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3349 : selectVaStartAAPCS(I, MF, MRI);
3350 case TargetOpcode::G_INTRINSIC:
3351 return selectIntrinsic(I, MRI);
3352 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3353 return selectIntrinsicWithSideEffects(I, MRI);
3354 case TargetOpcode::G_IMPLICIT_DEF: {
3355 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3356 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3357 const Register DstReg = I.getOperand(0).getReg();
3358 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3359 const TargetRegisterClass *DstRC =
3360 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3361 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3362 return true;
3363 }
3364 case TargetOpcode::G_BLOCK_ADDR: {
3365 if (TM.getCodeModel() == CodeModel::Large) {
3366 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3367 I.eraseFromParent();
3368 return true;
3369 } else {
3370 I.setDesc(TII.get(AArch64::MOVaddrBA));
3371 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3372 I.getOperand(0).getReg())
3373 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3374 /* Offset */ 0, AArch64II::MO_PAGE)
3375 .addBlockAddress(
3376 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3377 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3378 I.eraseFromParent();
3379 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3380 }
3381 }
3382 case AArch64::G_DUP: {
3383 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3384 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3385 // difficult because at RBS we may end up pessimizing the fpr case if we
3386 // decided to add an anyextend to fix this. Manual selection is the most
3387 // robust solution for now.
3388 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3389 AArch64::GPRRegBankID)
3390 return false; // We expect the fpr regbank case to be imported.
3391 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3392 if (VecTy == LLT::fixed_vector(8, 8))
3393 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3394 else if (VecTy == LLT::fixed_vector(16, 8))
3395 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3396 else if (VecTy == LLT::fixed_vector(4, 16))
3397 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3398 else if (VecTy == LLT::fixed_vector(8, 16))
3399 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3400 else
3401 return false;
3402 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3403 }
3404 case TargetOpcode::G_INTRINSIC_TRUNC:
3405 return selectIntrinsicTrunc(I, MRI);
3406 case TargetOpcode::G_INTRINSIC_ROUND:
3407 return selectIntrinsicRound(I, MRI);
3408 case TargetOpcode::G_BUILD_VECTOR:
3409 return selectBuildVector(I, MRI);
3410 case TargetOpcode::G_MERGE_VALUES:
3411 return selectMergeValues(I, MRI);
3412 case TargetOpcode::G_UNMERGE_VALUES:
3413 return selectUnmergeValues(I, MRI);
3414 case TargetOpcode::G_SHUFFLE_VECTOR:
3415 return selectShuffleVector(I, MRI);
3416 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3417 return selectExtractElt(I, MRI);
3418 case TargetOpcode::G_INSERT_VECTOR_ELT:
3419 return selectInsertElt(I, MRI);
3420 case TargetOpcode::G_CONCAT_VECTORS:
3421 return selectConcatVectors(I, MRI);
3422 case TargetOpcode::G_JUMP_TABLE:
3423 return selectJumpTable(I, MRI);
3424 case TargetOpcode::G_VECREDUCE_FADD:
3425 case TargetOpcode::G_VECREDUCE_ADD:
3426 return selectReduction(I, MRI);
3427 }
3428
3429 return false;
3430}
3431
3432bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3433 MachineRegisterInfo &MRI) {
3434 Register VecReg = I.getOperand(1).getReg();
3435 LLT VecTy = MRI.getType(VecReg);
3436 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3437 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3438 // a subregister copy afterwards.
3439 if (VecTy == LLT::fixed_vector(2, 32)) {
3440 Register DstReg = I.getOperand(0).getReg();
3441 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3442 {VecReg, VecReg});
3443 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3444 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3445 .getReg(0);
3446 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3447 I.eraseFromParent();
3448 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3449 }
3450
3451 unsigned Opc = 0;
3452 if (VecTy == LLT::fixed_vector(16, 8))
3453 Opc = AArch64::ADDVv16i8v;
3454 else if (VecTy == LLT::fixed_vector(8, 16))
3455 Opc = AArch64::ADDVv8i16v;
3456 else if (VecTy == LLT::fixed_vector(4, 32))
3457 Opc = AArch64::ADDVv4i32v;
3458 else if (VecTy == LLT::fixed_vector(2, 64))
3459 Opc = AArch64::ADDPv2i64p;
3460 else {
3461 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3462 return false;
3463 }
3464 I.setDesc(TII.get(Opc));
3465 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3466 }
3467
3468 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3469 unsigned Opc = 0;
3470 if (VecTy == LLT::fixed_vector(2, 32))
3471 Opc = AArch64::FADDPv2i32p;
3472 else if (VecTy == LLT::fixed_vector(2, 64))
3473 Opc = AArch64::FADDPv2i64p;
3474 else {
3475 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3476 return false;
3477 }
3478 I.setDesc(TII.get(Opc));
3479 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3480 }
3481 return false;
3482}
3483
3484bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3485 MachineRegisterInfo &MRI) {
3486 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3486, __extension__ __PRETTY_FUNCTION__))
;
3487 Register JTAddr = I.getOperand(0).getReg();
3488 unsigned JTI = I.getOperand(1).getIndex();
3489 Register Index = I.getOperand(2).getReg();
3490
3491 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3492 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3493
3494 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3495 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3496 {TargetReg, ScratchReg}, {JTAddr, Index})
3497 .addJumpTableIndex(JTI);
3498 // Build the indirect branch.
3499 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3500 I.eraseFromParent();
3501 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3502}
3503
3504bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3505 MachineRegisterInfo &MRI) {
3506 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3506, __extension__ __PRETTY_FUNCTION__))
;
3507 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3507, __extension__ __PRETTY_FUNCTION__))
;
3508
3509 Register DstReg = I.getOperand(0).getReg();
3510 unsigned JTI = I.getOperand(1).getIndex();
3511 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3512 auto MovMI =
3513 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3514 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3515 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3516 I.eraseFromParent();
3517 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3518}
3519
3520bool AArch64InstructionSelector::selectTLSGlobalValue(
3521 MachineInstr &I, MachineRegisterInfo &MRI) {
3522 if (!STI.isTargetMachO())
3523 return false;
3524 MachineFunction &MF = *I.getParent()->getParent();
3525 MF.getFrameInfo().setAdjustsStack(true);
3526
3527 const auto &GlobalOp = I.getOperand(1);
3528 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3529, __extension__ __PRETTY_FUNCTION__))
3529 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3529, __extension__ __PRETTY_FUNCTION__))
;
3530 const GlobalValue &GV = *GlobalOp.getGlobal();
3531
3532 auto LoadGOT =
3533 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3534 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3535
3536 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3537 {LoadGOT.getReg(0)})
3538 .addImm(0);
3539
3540 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3541 // TLS calls preserve all registers except those that absolutely must be
3542 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3543 // silly).
3544 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3545 .addUse(AArch64::X0, RegState::Implicit)
3546 .addDef(AArch64::X0, RegState::Implicit)
3547 .addRegMask(TRI.getTLSCallPreservedMask());
3548
3549 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3550 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3551 MRI);
3552 I.eraseFromParent();
3553 return true;
3554}
3555
3556bool AArch64InstructionSelector::selectIntrinsicTrunc(
3557 MachineInstr &I, MachineRegisterInfo &MRI) const {
3558 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3559
3560 // Select the correct opcode.
3561 unsigned Opc = 0;
3562 if (!SrcTy.isVector()) {
3563 switch (SrcTy.getSizeInBits()) {
3564 default:
3565 case 16:
3566 Opc = AArch64::FRINTZHr;
3567 break;
3568 case 32:
3569 Opc = AArch64::FRINTZSr;
3570 break;
3571 case 64:
3572 Opc = AArch64::FRINTZDr;
3573 break;
3574 }
3575 } else {
3576 unsigned NumElts = SrcTy.getNumElements();
3577 switch (SrcTy.getElementType().getSizeInBits()) {
3578 default:
3579 break;
3580 case 16:
3581 if (NumElts == 4)
3582 Opc = AArch64::FRINTZv4f16;
3583 else if (NumElts == 8)
3584 Opc = AArch64::FRINTZv8f16;
3585 break;
3586 case 32:
3587 if (NumElts == 2)
3588 Opc = AArch64::FRINTZv2f32;
3589 else if (NumElts == 4)
3590 Opc = AArch64::FRINTZv4f32;
3591 break;
3592 case 64:
3593 if (NumElts == 2)
3594 Opc = AArch64::FRINTZv2f64;
3595 break;
3596 }
3597 }
3598
3599 if (!Opc) {
3600 // Didn't get an opcode above, bail.
3601 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3602 return false;
3603 }
3604
3605 // Legalization would have set us up perfectly for this; we just need to
3606 // set the opcode and move on.
3607 I.setDesc(TII.get(Opc));
3608 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3609}
3610
3611bool AArch64InstructionSelector::selectIntrinsicRound(
3612 MachineInstr &I, MachineRegisterInfo &MRI) const {
3613 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3614
3615 // Select the correct opcode.
3616 unsigned Opc = 0;
3617 if (!SrcTy.isVector()) {
3618 switch (SrcTy.getSizeInBits()) {
3619 default:
3620 case 16:
3621 Opc = AArch64::FRINTAHr;
3622 break;
3623 case 32:
3624 Opc = AArch64::FRINTASr;
3625 break;
3626 case 64:
3627 Opc = AArch64::FRINTADr;
3628 break;
3629 }
3630 } else {
3631 unsigned NumElts = SrcTy.getNumElements();
3632 switch (SrcTy.getElementType().getSizeInBits()) {
3633 default:
3634 break;
3635 case 16:
3636 if (NumElts == 4)
3637 Opc = AArch64::FRINTAv4f16;
3638 else if (NumElts == 8)
3639 Opc = AArch64::FRINTAv8f16;
3640 break;
3641 case 32:
3642 if (NumElts == 2)
3643 Opc = AArch64::FRINTAv2f32;
3644 else if (NumElts == 4)
3645 Opc = AArch64::FRINTAv4f32;
3646 break;
3647 case 64:
3648 if (NumElts == 2)
3649 Opc = AArch64::FRINTAv2f64;
3650 break;
3651 }
3652 }
3653
3654 if (!Opc) {
3655 // Didn't get an opcode above, bail.
3656 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3657 return false;
3658 }
3659
3660 // Legalization would have set us up perfectly for this; we just need to
3661 // set the opcode and move on.
3662 I.setDesc(TII.get(Opc));
3663 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3664}
3665
3666bool AArch64InstructionSelector::selectVectorICmp(
3667 MachineInstr &I, MachineRegisterInfo &MRI) {
3668 Register DstReg = I.getOperand(0).getReg();
3669 LLT DstTy = MRI.getType(DstReg);
3670 Register SrcReg = I.getOperand(2).getReg();
3671 Register Src2Reg = I.getOperand(3).getReg();
3672 LLT SrcTy = MRI.getType(SrcReg);
3673
3674 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3675 unsigned NumElts = DstTy.getNumElements();
3676
3677 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3678 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3679 // Third index is cc opcode:
3680 // 0 == eq
3681 // 1 == ugt
3682 // 2 == uge
3683 // 3 == ult
3684 // 4 == ule
3685 // 5 == sgt
3686 // 6 == sge
3687 // 7 == slt
3688 // 8 == sle
3689 // ne is done by negating 'eq' result.
3690
3691 // This table below assumes that for some comparisons the operands will be
3692 // commuted.
3693 // ult op == commute + ugt op
3694 // ule op == commute + uge op
3695 // slt op == commute + sgt op
3696 // sle op == commute + sge op
3697 unsigned PredIdx = 0;
3698 bool SwapOperands = false;
3699 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3700 switch (Pred) {
3701 case CmpInst::ICMP_NE:
3702 case CmpInst::ICMP_EQ:
3703 PredIdx = 0;
3704 break;
3705 case CmpInst::ICMP_UGT:
3706 PredIdx = 1;
3707 break;
3708 case CmpInst::ICMP_UGE:
3709 PredIdx = 2;
3710 break;
3711 case CmpInst::ICMP_ULT:
3712 PredIdx = 3;
3713 SwapOperands = true;
3714 break;
3715 case CmpInst::ICMP_ULE:
3716 PredIdx = 4;
3717 SwapOperands = true;
3718 break;
3719 case CmpInst::ICMP_SGT:
3720 PredIdx = 5;
3721 break;
3722 case CmpInst::ICMP_SGE:
3723 PredIdx = 6;
3724 break;
3725 case CmpInst::ICMP_SLT:
3726 PredIdx = 7;
3727 SwapOperands = true;
3728 break;
3729 case CmpInst::ICMP_SLE:
3730 PredIdx = 8;
3731 SwapOperands = true;
3732 break;
3733 default:
3734 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3734)
;
3735 return false;
3736 }
3737
3738 // This table obviously should be tablegen'd when we have our GISel native
3739 // tablegen selector.
3740
3741 static const unsigned OpcTable[4][4][9] = {
3742 {
3743 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3744 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3745 0 /* invalid */},
3746 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3747 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3748 0 /* invalid */},
3749 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3750 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3751 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3752 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3753 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3754 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3755 },
3756 {
3757 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3758 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3759 0 /* invalid */},
3760 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3761 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3762 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3763 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3764 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3765 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3766 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3767 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768 0 /* invalid */}
3769 },
3770 {
3771 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3772 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3773 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3774 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3775 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3776 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3777 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3778 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3779 0 /* invalid */},
3780 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3781 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3782 0 /* invalid */}
3783 },
3784 {
3785 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3786 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3787 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3788 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3789 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3790 0 /* invalid */},
3791 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3792 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3793 0 /* invalid */},
3794 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3795 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3796 0 /* invalid */}
3797 },
3798 };
3799 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3800 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3801 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3802 if (!Opc) {
3803 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3804 return false;
3805 }
3806
3807 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3808 const TargetRegisterClass *SrcRC =
3809 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3810 if (!SrcRC) {
3811 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3812 return false;
3813 }
3814
3815 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3816 if (SrcTy.getSizeInBits() == 128)
3817 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3818
3819 if (SwapOperands)
3820 std::swap(SrcReg, Src2Reg);
3821
3822 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3823 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3824
3825 // Invert if we had a 'ne' cc.
3826 if (NotOpc) {
3827 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3828 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3829 } else {
3830 MIB.buildCopy(DstReg, Cmp.getReg(0));
3831 }
3832 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3833 I.eraseFromParent();
3834 return true;
3835}
3836
3837MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3838 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3839 MachineIRBuilder &MIRBuilder) const {
3840 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3841
3842 auto BuildFn = [&](unsigned SubregIndex) {
3843 auto Ins =
3844 MIRBuilder
3845 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3846 .addImm(SubregIndex);
3847 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3848 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3849 return &*Ins;
3850 };
3851
3852 switch (EltSize) {
3853 case 16:
3854 return BuildFn(AArch64::hsub);
3855 case 32:
3856 return BuildFn(AArch64::ssub);
3857 case 64:
3858 return BuildFn(AArch64::dsub);
3859 default:
3860 return nullptr;
3861 }
3862}
3863
3864bool AArch64InstructionSelector::selectMergeValues(
3865 MachineInstr &I, MachineRegisterInfo &MRI) {
3866 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3866, __extension__ __PRETTY_FUNCTION__))
;
3867 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3868 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3869 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3869, __extension__ __PRETTY_FUNCTION__))
;
3870 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3871
3872 if (I.getNumOperands() != 3)
3873 return false;
3874
3875 // Merging 2 s64s into an s128.
3876 if (DstTy == LLT::scalar(128)) {
3877 if (SrcTy.getSizeInBits() != 64)
3878 return false;
3879 Register DstReg = I.getOperand(0).getReg();
3880 Register Src1Reg = I.getOperand(1).getReg();
3881 Register Src2Reg = I.getOperand(2).getReg();
3882 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3883 MachineInstr *InsMI =
3884 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3885 if (!InsMI)
3886 return false;
3887 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3888 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3889 if (!Ins2MI)
3890 return false;
3891 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3892 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3893 I.eraseFromParent();
3894 return true;
3895 }
3896
3897 if (RB.getID() != AArch64::GPRRegBankID)
3898 return false;
3899
3900 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3901 return false;
3902
3903 auto *DstRC = &AArch64::GPR64RegClass;
3904 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3905 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3906 TII.get(TargetOpcode::SUBREG_TO_REG))
3907 .addDef(SubToRegDef)
3908 .addImm(0)
3909 .addUse(I.getOperand(1).getReg())
3910 .addImm(AArch64::sub_32);
3911 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3912 // Need to anyext the second scalar before we can use bfm
3913 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3914 TII.get(TargetOpcode::SUBREG_TO_REG))
3915 .addDef(SubToRegDef2)
3916 .addImm(0)
3917 .addUse(I.getOperand(2).getReg())
3918 .addImm(AArch64::sub_32);
3919 MachineInstr &BFM =
3920 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3921 .addDef(I.getOperand(0).getReg())
3922 .addUse(SubToRegDef)
3923 .addUse(SubToRegDef2)
3924 .addImm(32)
3925 .addImm(31);
3926 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3927 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3928 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3929 I.eraseFromParent();
3930 return true;
3931}
3932
3933static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3934 const unsigned EltSize) {
3935 // Choose a lane copy opcode and subregister based off of the size of the
3936 // vector's elements.
3937 switch (EltSize) {
3938 case 8:
3939 CopyOpc = AArch64::DUPi8;
3940 ExtractSubReg = AArch64::bsub;
3941 break;
3942 case 16:
3943 CopyOpc = AArch64::DUPi16;
3944 ExtractSubReg = AArch64::hsub;
3945 break;
3946 case 32:
3947 CopyOpc = AArch64::DUPi32;
3948 ExtractSubReg = AArch64::ssub;
3949 break;
3950 case 64:
3951 CopyOpc = AArch64::DUPi64;
3952 ExtractSubReg = AArch64::dsub;
3953 break;
3954 default:
3955 // Unknown size, bail out.
3956 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3957 return false;
3958 }
3959 return true;
3960}
3961
3962MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3963 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3964 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3965 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3966 unsigned CopyOpc = 0;
3967 unsigned ExtractSubReg = 0;
3968 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3969 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3970 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3971 return nullptr;
3972 }
3973
3974 const TargetRegisterClass *DstRC =
3975 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3976 if (!DstRC) {
3977 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3978 return nullptr;
3979 }
3980
3981 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3982 const LLT &VecTy = MRI.getType(VecReg);
3983 const TargetRegisterClass *VecRC =
3984 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3985 if (!VecRC) {
3986 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3987 return nullptr;
3988 }
3989
3990 // The register that we're going to copy into.
3991 Register InsertReg = VecReg;
3992 if (!DstReg)
3993 DstReg = MRI.createVirtualRegister(DstRC);
3994 // If the lane index is 0, we just use a subregister COPY.
3995 if (LaneIdx == 0) {
3996 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3997 .addReg(VecReg, 0, ExtractSubReg);
3998 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3999 return &*Copy;
4000 }
4001
4002 // Lane copies require 128-bit wide registers. If we're dealing with an
4003 // unpacked vector, then we need to move up to that width. Insert an implicit
4004 // def and a subregister insert to get us there.
4005 if (VecTy.getSizeInBits() != 128) {
4006 MachineInstr *ScalarToVector = emitScalarToVector(
4007 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4008 if (!ScalarToVector)
4009 return nullptr;
4010 InsertReg = ScalarToVector->getOperand(0).getReg();
4011 }
4012
4013 MachineInstr *LaneCopyMI =
4014 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4015 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4016
4017 // Make sure that we actually constrain the initial copy.
4018 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4019 return LaneCopyMI;
4020}
4021
4022bool AArch64InstructionSelector::selectExtractElt(
4023 MachineInstr &I, MachineRegisterInfo &MRI) {
4024 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4025, __extension__ __PRETTY_FUNCTION__))
4025 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4025, __extension__ __PRETTY_FUNCTION__))
;
4026 Register DstReg = I.getOperand(0).getReg();
4027 const LLT NarrowTy = MRI.getType(DstReg);
4028 const Register SrcReg = I.getOperand(1).getReg();
4029 const LLT WideTy = MRI.getType(SrcReg);
4030 (void)WideTy;
4031 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4032, __extension__ __PRETTY_FUNCTION__))
4032 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4032, __extension__ __PRETTY_FUNCTION__))
;
4033 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4033, __extension__ __PRETTY_FUNCTION__))
;
4034
4035 // Need the lane index to determine the correct copy opcode.
4036 MachineOperand &LaneIdxOp = I.getOperand(2);
4037 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4037, __extension__ __PRETTY_FUNCTION__))
;
4038
4039 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4040 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4041 return false;
4042 }
4043
4044 // Find the index to extract from.
4045 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4046 if (!VRegAndVal)
4047 return false;
4048 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4049
4050
4051 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4052 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4053 LaneIdx, MIB);
4054 if (!Extract)
4055 return false;
4056
4057 I.eraseFromParent();
4058 return true;
4059}
4060
4061bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4062 MachineInstr &I, MachineRegisterInfo &MRI) {
4063 unsigned NumElts = I.getNumOperands() - 1;
4064 Register SrcReg = I.getOperand(NumElts).getReg();
4065 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4066 const LLT SrcTy = MRI.getType(SrcReg);
4067
4068 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4068, __extension__ __PRETTY_FUNCTION__))
;
4069 if (SrcTy.getSizeInBits() > 128) {
4070 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4071 return false;
4072 }
4073
4074 // We implement a split vector operation by treating the sub-vectors as
4075 // scalars and extracting them.
4076 const RegisterBank &DstRB =
4077 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4078 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4079 Register Dst = I.getOperand(OpIdx).getReg();
4080 MachineInstr *Extract =
4081 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4082 if (!Extract)
4083 return false;
4084 }
4085 I.eraseFromParent();
4086 return true;
4087}
4088
4089bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4090 MachineRegisterInfo &MRI) {
4091 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4092, __extension__ __PRETTY_FUNCTION__))
4092 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4092, __extension__ __PRETTY_FUNCTION__))
;
4093
4094 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4095 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4096 AArch64::FPRRegBankID ||
4097 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4098 AArch64::FPRRegBankID) {
4099 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4100 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4101 return false;
4102 }
4103
4104 // The last operand is the vector source register, and every other operand is
4105 // a register to unpack into.
4106 unsigned NumElts = I.getNumOperands() - 1;
4107 Register SrcReg = I.getOperand(NumElts).getReg();
4108 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4109 const LLT WideTy = MRI.getType(SrcReg);
4110 (void)WideTy;
4111 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4112, __extension__ __PRETTY_FUNCTION__))
4112 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4112, __extension__ __PRETTY_FUNCTION__))
;
4113 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4114, __extension__ __PRETTY_FUNCTION__))
4114 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4114, __extension__ __PRETTY_FUNCTION__))
;
4115
4116 if (!NarrowTy.isScalar())
4117 return selectSplitVectorUnmerge(I, MRI);
4118
4119 // Choose a lane copy opcode and subregister based off of the size of the
4120 // vector's elements.
4121 unsigned CopyOpc = 0;
4122 unsigned ExtractSubReg = 0;
4123 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4124 return false;
4125
4126 // Set up for the lane copies.
4127 MachineBasicBlock &MBB = *I.getParent();
4128
4129 // Stores the registers we'll be copying from.
4130 SmallVector<Register, 4> InsertRegs;
4131
4132 // We'll use the first register twice, so we only need NumElts-1 registers.
4133 unsigned NumInsertRegs = NumElts - 1;
4134
4135 // If our elements fit into exactly 128 bits, then we can copy from the source
4136 // directly. Otherwise, we need to do a bit of setup with some subregister
4137 // inserts.
4138 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4139 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4140 } else {
4141 // No. We have to perform subregister inserts. For each insert, create an
4142 // implicit def and a subregister insert, and save the register we create.
4143 const TargetRegisterClass *RC =
4144 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4145 WideTy.getScalarSizeInBits() * NumElts);
4146 unsigned SubReg = 0;
4147 bool Found = getSubRegForClass(RC, TRI, SubReg);
4148 (void)Found;
4149 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4149, __extension__ __PRETTY_FUNCTION__))
;
4150 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4151 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4152 MachineInstr &ImpDefMI =
4153 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4154 ImpDefReg);
4155
4156 // Now, create the subregister insert from SrcReg.
4157 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4158 MachineInstr &InsMI =
4159 *BuildMI(MBB, I, I.getDebugLoc(),
4160 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4161 .addUse(ImpDefReg)
4162 .addUse(SrcReg)
4163 .addImm(SubReg);
4164
4165 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4166 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4167
4168 // Save the register so that we can copy from it after.
4169 InsertRegs.push_back(InsertReg);
4170 }
4171 }
4172
4173 // Now that we've created any necessary subregister inserts, we can
4174 // create the copies.
4175 //
4176 // Perform the first copy separately as a subregister copy.
4177 Register CopyTo = I.getOperand(0).getReg();
4178 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4179 .addReg(InsertRegs[0], 0, ExtractSubReg);
4180 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4181
4182 // Now, perform the remaining copies as vector lane copies.
4183 unsigned LaneIdx = 1;
4184 for (Register InsReg : InsertRegs) {
4185 Register CopyTo = I.getOperand(LaneIdx).getReg();
4186 MachineInstr &CopyInst =
4187 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4188 .addUse(InsReg)
4189 .addImm(LaneIdx);
4190 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4191 ++LaneIdx;
4192 }
4193
4194 // Separately constrain the first copy's destination. Because of the
4195 // limitation in constrainOperandRegClass, we can't guarantee that this will
4196 // actually be constrained. So, do it ourselves using the second operand.
4197 const TargetRegisterClass *RC =
4198 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4199 if (!RC) {
4200 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4201 return false;
4202 }
4203
4204 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4205 I.eraseFromParent();
4206 return true;
4207}
4208
4209bool AArch64InstructionSelector::selectConcatVectors(
4210 MachineInstr &I, MachineRegisterInfo &MRI) {
4211 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4212, __extension__ __PRETTY_FUNCTION__))
4212 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4212, __extension__ __PRETTY_FUNCTION__))
;
4213 Register Dst = I.getOperand(0).getReg();
4214 Register Op1 = I.getOperand(1).getReg();
4215 Register Op2 = I.getOperand(2).getReg();
4216 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4217 if (!ConcatMI)
4218 return false;
4219 I.eraseFromParent();
4220 return true;
4221}
4222
4223unsigned
4224AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4225 MachineFunction &MF) const {
4226 Type *CPTy = CPVal->getType();
4227 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4228
4229 MachineConstantPool *MCP = MF.getConstantPool();
4230 return MCP->getConstantPoolIndex(CPVal, Alignment);
4231}
4232
4233MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4234 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4235 auto &MF = MIRBuilder.getMF();
4236 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4237
4238 auto Adrp =
4239 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4240 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4241
4242 MachineInstr *LoadMI = nullptr;
4243 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4244 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4245 switch (Size) {
4246 case 16:
4247 LoadMI =
4248 &*MIRBuilder
4249 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4250 .addConstantPoolIndex(CPIdx, 0,
4251 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4252 break;
4253 case 8:
4254 LoadMI =
4255 &*MIRBuilder
4256 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4257 .addConstantPoolIndex(CPIdx, 0,
4258 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4259 break;
4260 case 4:
4261 LoadMI =
4262 &*MIRBuilder
4263 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4264 .addConstantPoolIndex(CPIdx, 0,
4265 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4266 break;
4267 case 2:
4268 LoadMI =
4269 &*MIRBuilder
4270 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4271 .addConstantPoolIndex(CPIdx, 0,
4272 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4273 break;
4274 default:
4275 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4276 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4277 return nullptr;
4278 }
4279 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4280 MachineMemOperand::MOLoad,
4281 Size, Align(Size)));
4282 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4283 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4284 return LoadMI;
4285}
4286
4287/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4288/// size and RB.
4289static std::pair<unsigned, unsigned>
4290getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4291 unsigned Opc, SubregIdx;
4292 if (RB.getID() == AArch64::GPRRegBankID) {
4293 if (EltSize == 16) {
4294 Opc = AArch64::INSvi16gpr;
4295 SubregIdx = AArch64::ssub;
4296 } else if (EltSize == 32) {
4297 Opc = AArch64::INSvi32gpr;
4298 SubregIdx = AArch64::ssub;
4299 } else if (EltSize == 64) {
4300 Opc = AArch64::INSvi64gpr;
4301 SubregIdx = AArch64::dsub;
4302 } else {
4303 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4303)
;
4304 }
4305 } else {
4306 if (EltSize == 8) {
4307 Opc = AArch64::INSvi8lane;
4308 SubregIdx = AArch64::bsub;
4309 } else if (EltSize == 16) {
4310 Opc = AArch64::INSvi16lane;
4311 SubregIdx = AArch64::hsub;
4312 } else if (EltSize == 32) {
4313 Opc = AArch64::INSvi32lane;
4314 SubregIdx = AArch64::ssub;
4315 } else if (EltSize == 64) {
4316 Opc = AArch64::INSvi64lane;
4317 SubregIdx = AArch64::dsub;
4318 } else {
4319 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4319)
;
4320 }
4321 }
4322 return std::make_pair(Opc, SubregIdx);
4323}
4324
4325MachineInstr *AArch64InstructionSelector::emitInstr(
4326 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4327 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4328 const ComplexRendererFns &RenderFns) const {
4329 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4329, __extension__ __PRETTY_FUNCTION__))
;
4330 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4331, __extension__ __PRETTY_FUNCTION__))
4331 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4331, __extension__ __PRETTY_FUNCTION__))
;
4332 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4333 if (RenderFns)
4334 for (auto &Fn : *RenderFns)
4335 Fn(MI);
4336 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4337 return &*MI;
4338}
4339
4340MachineInstr *AArch64InstructionSelector::emitAddSub(
4341 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4342 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4343 MachineIRBuilder &MIRBuilder) const {
4344 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4345 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4345, __extension__ __PRETTY_FUNCTION__))
;
4346 auto Ty = MRI.getType(LHS.getReg());
4347 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4347, __extension__ __PRETTY_FUNCTION__))
;
4348 unsigned Size = Ty.getSizeInBits();
4349 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4349, __extension__ __PRETTY_FUNCTION__))
;
4350 bool Is32Bit = Size == 32;
4351
4352 // INSTRri form with positive arithmetic immediate.
4353 if (auto Fns = selectArithImmed(RHS))
4354 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4355 MIRBuilder, Fns);
4356
4357 // INSTRri form with negative arithmetic immediate.
4358 if (auto Fns = selectNegArithImmed(RHS))
4359 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4360 MIRBuilder, Fns);
4361
4362 // INSTRrx form.
4363 if (auto Fns = selectArithExtendedRegister(RHS))
4364 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4365 MIRBuilder, Fns);
4366
4367 // INSTRrs form.
4368 if (auto Fns = selectShiftedRegister(RHS))
4369 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4370 MIRBuilder, Fns);
4371 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4372 MIRBuilder);
4373}
4374
4375MachineInstr *
4376AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4377 MachineOperand &RHS,
4378 MachineIRBuilder &MIRBuilder) const {
4379 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4380 {{AArch64::ADDXri, AArch64::ADDWri},
4381 {AArch64::ADDXrs, AArch64::ADDWrs},
4382 {AArch64::ADDXrr, AArch64::ADDWrr},
4383 {AArch64::SUBXri, AArch64::SUBWri},
4384 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4385 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4386}
4387
4388MachineInstr *
4389AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4390 MachineOperand &RHS,
4391 MachineIRBuilder &MIRBuilder) const {
4392 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4393 {{AArch64::ADDSXri, AArch64::ADDSWri},
4394 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4395 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4396 {AArch64::SUBSXri, AArch64::SUBSWri},
4397 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4398 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4399}
4400
4401MachineInstr *
4402AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4403 MachineOperand &RHS,
4404 MachineIRBuilder &MIRBuilder) const {
4405 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4406 {{AArch64::SUBSXri, AArch64::SUBSWri},
4407 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4408 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4409 {AArch64::ADDSXri, AArch64::ADDSWri},
4410 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4411 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4412}
4413
4414MachineInstr *
4415AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4416 MachineIRBuilder &MIRBuilder) const {
4417 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4418 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4419 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4420 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4421}
4422
4423MachineInstr *
4424AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4425 MachineIRBuilder &MIRBuilder) const {
4426 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4426, __extension__ __PRETTY_FUNCTION__))
;
4427 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4428 LLT Ty = MRI.getType(LHS.getReg());
4429 unsigned RegSize = Ty.getSizeInBits();
4430 bool Is32Bit = (RegSize == 32);
4431 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4432 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4433 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4434 // ANDS needs a logical immediate for its immediate form. Check if we can
4435 // fold one in.
4436 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4437 int64_t Imm = ValAndVReg->Value.getSExtValue();
4438
4439 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4440 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4441 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4442 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4443 return &*TstMI;
4444 }
4445 }
4446
4447 if (auto Fns = selectLogicalShiftedRegister(RHS))
4448 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4449 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4450}
4451
4452MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4453 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4454 MachineIRBuilder &MIRBuilder) const {
4455 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4455, __extension__ __PRETTY_FUNCTION__))
;
4456 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4456, __extension__ __PRETTY_FUNCTION__))
;
4457 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4458 LLT CmpTy = MRI.getType(LHS.getReg());
4459 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4459, __extension__ __PRETTY_FUNCTION__))
;
4460 unsigned Size = CmpTy.getSizeInBits();
4461 (void)Size;
4462 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4462, __extension__ __PRETTY_FUNCTION__))
;
4463 // Fold the compare into a cmn or tst if possible.
4464 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4465 return FoldCmp;
4466 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4467 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4468}
4469
4470MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4471 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4472 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4473#ifndef NDEBUG
4474 LLT Ty = MRI.getType(Dst);
4475 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4476, __extension__ __PRETTY_FUNCTION__))
4476 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4476, __extension__ __PRETTY_FUNCTION__))
;
4477#endif
4478 const Register ZReg = AArch64::WZR;
4479 AArch64CC::CondCode CC1, CC2;
4480 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4481 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4482 if (CC2 == AArch64CC::AL)
4483 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4484 MIRBuilder);
4485 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4486 Register Def1Reg = MRI.createVirtualRegister(RC);
4487 Register Def2Reg = MRI.createVirtualRegister(RC);
4488 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4489 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4490 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4491 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4492 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4493 return &*OrMI;
4494}
4495
4496MachineInstr *
4497AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4498 MachineIRBuilder &MIRBuilder,
4499 Optional<CmpInst::Predicate> Pred) const {
4500 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4501 LLT Ty = MRI.getType(LHS);
4502 if (Ty.isVector())
4503 return nullptr;
4504 unsigned OpSize = Ty.getSizeInBits();
4505 if (OpSize != 32 && OpSize != 64)
4506 return nullptr;
4507
4508 // If this is a compare against +0.0, then we don't have
4509 // to explicitly materialize a constant.
4510 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4511 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4512
4513 auto IsEqualityPred = [](CmpInst::Predicate P) {
4514 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4515 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4516 };
4517 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4518 // Try commutating the operands.
4519 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4520 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4521 ShouldUseImm = true;
4522 std::swap(LHS, RHS);
4523 }
4524 }
4525 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4526 {AArch64::FCMPSri, AArch64::FCMPDri}};
4527 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4528
4529 // Partially build the compare. Decide if we need to add a use for the
4530 // third operand based off whether or not we're comparing against 0.0.
4531 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4532 if (!ShouldUseImm)
4533 CmpMI.addUse(RHS);
4534 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4535 return &*CmpMI;
4536}
4537
4538MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4539 Optional<Register> Dst, Register Op1, Register Op2,
4540 MachineIRBuilder &MIRBuilder) const {
4541 // We implement a vector concat by:
4542 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4543 // 2. Insert the upper vector into the destination's upper element
4544 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4545 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4546
4547 const LLT Op1Ty = MRI.getType(Op1);
4548 const LLT Op2Ty = MRI.getType(Op2);
4549
4550 if (Op1Ty != Op2Ty) {
4551 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4552 return nullptr;
4553 }
4554 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4554, __extension__ __PRETTY_FUNCTION__))
;
4555
4556 if (Op1Ty.getSizeInBits() >= 128) {
4557 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4558 return nullptr;
4559 }
4560
4561 // At the moment we just support 64 bit vector concats.
4562 if (Op1Ty.getSizeInBits() != 64) {
4563 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4564 return nullptr;
4565 }
4566
4567 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4568 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4569 const TargetRegisterClass *DstRC =
4570 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4571
4572 MachineInstr *WidenedOp1 =
4573 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4574 MachineInstr *WidenedOp2 =
4575 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4576 if (!WidenedOp1 || !WidenedOp2) {
4577 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4578 return nullptr;
4579 }
4580
4581 // Now do the insert of the upper element.
4582 unsigned InsertOpc, InsSubRegIdx;
4583 std::tie(InsertOpc, InsSubRegIdx) =
4584 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4585
4586 if (!Dst)
4587 Dst = MRI.createVirtualRegister(DstRC);
4588 auto InsElt =
4589 MIRBuilder
4590 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4591 .addImm(1) /* Lane index */
4592 .addUse(WidenedOp2->getOperand(0).getReg())
4593 .addImm(0);
4594 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4595 return &*InsElt;
4596}
4597
4598MachineInstr *
4599AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4600 Register Src2, AArch64CC::CondCode Pred,
4601 MachineIRBuilder &MIRBuilder) const {
4602 auto &MRI = *MIRBuilder.getMRI();
4603 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4604 // If we used a register class, then this won't necessarily have an LLT.
4605 // Compute the size based off whether or not we have a class or bank.
4606 unsigned Size;
4607 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4608 Size = TRI.getRegSizeInBits(*RC);
4609 else
4610 Size = MRI.getType(Dst).getSizeInBits();
4611 // Some opcodes use s1.
4612 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4612, __extension__ __PRETTY_FUNCTION__))
;
4613 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4614 unsigned Opc = OpcTable[Size == 64];
4615 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4616 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4617 return &*CSINC;
4618}
4619
4620std::pair<MachineInstr *, AArch64CC::CondCode>
4621AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4622 MachineOperand &LHS,
4623 MachineOperand &RHS,
4624 MachineIRBuilder &MIRBuilder) const {
4625 switch (Opcode) {
4626 default:
4627 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4627)
;
4628 case TargetOpcode::G_SADDO:
4629 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4630 case TargetOpcode::G_UADDO:
4631 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4632 case TargetOpcode::G_SSUBO:
4633 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4634 case TargetOpcode::G_USUBO:
4635 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4636 }
4637}
4638
4639bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4640 MachineRegisterInfo &MRI = *MIB.getMRI();
4641 // We want to recognize this pattern:
4642 //
4643 // $z = G_FCMP pred, $x, $y
4644 // ...
4645 // $w = G_SELECT $z, $a, $b
4646 //
4647 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4648 // some copies/truncs in between.)
4649 //
4650 // If we see this, then we can emit something like this:
4651 //
4652 // fcmp $x, $y
4653 // fcsel $w, $a, $b, pred
4654 //
4655 // Rather than emitting both of the rather long sequences in the standard
4656 // G_FCMP/G_SELECT select methods.
4657
4658 // First, check if the condition is defined by a compare.
4659 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4660 while (CondDef) {
4661 // We can only fold if all of the defs have one use.
4662 Register CondDefReg = CondDef->getOperand(0).getReg();
4663 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4664 // Unless it's another select.
4665 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4666 if (CondDef == &UI)
4667 continue;
4668 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4669 return false;
4670 }
4671 }
4672
4673 // We can skip over G_TRUNC since the condition is 1-bit.
4674 // Truncating/extending can have no impact on the value.
4675 unsigned Opc = CondDef->getOpcode();
4676 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4677 break;
4678
4679 // Can't see past copies from physregs.
4680 if (Opc == TargetOpcode::COPY &&
4681 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4682 return false;
4683
4684 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4685 }
4686
4687 // Is the condition defined by a compare?
4688 if (!CondDef)
4689 return false;
4690
4691 unsigned CondOpc = CondDef->getOpcode();
4692 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4693 return false;
4694
4695 AArch64CC::CondCode CondCode;
4696 if (CondOpc == TargetOpcode::G_ICMP) {
4697 auto Pred =
4698 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4699 CondCode = changeICMPPredToAArch64CC(Pred);
4700 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4701 CondDef->getOperand(1), MIB);
4702 } else {
4703 // Get the condition code for the select.
4704 auto Pred =
4705 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4706 AArch64CC::CondCode CondCode2;
4707 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4708
4709 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4710 // instructions to emit the comparison.
4711 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4712 // unnecessary.
4713 if (CondCode2 != AArch64CC::AL)
4714 return false;
4715
4716 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4717 CondDef->getOperand(3).getReg(), MIB)) {
4718 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4719 return false;
4720 }
4721 }
4722
4723 // Emit the select.
4724 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4725 I.getOperand(3).getReg(), CondCode, MIB);
4726 I.eraseFromParent();
4727 return true;
4728}
4729
4730MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4731 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4732 MachineIRBuilder &MIRBuilder) const {
4733 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4734, __extension__ __PRETTY_FUNCTION__))
4734 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4734, __extension__ __PRETTY_FUNCTION__))
;
4735 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4736 // We want to find this sort of thing:
4737 // x = G_SUB 0, y
4738 // G_ICMP z, x
4739 //
4740 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4741 // e.g:
4742 //
4743 // cmn z, y
4744
4745 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4746 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4747 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4748 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4749 // Given this:
4750 //
4751 // x = G_SUB 0, y
4752 // G_ICMP x, z
4753 //
4754 // Produce this:
4755 //
4756 // cmn y, z
4757 if (isCMN(LHSDef, P, MRI))
4758 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4759
4760 // Same idea here, but with the RHS of the compare instead:
4761 //
4762 // Given this:
4763 //
4764 // x = G_SUB 0, y
4765 // G_ICMP z, x
4766 //
4767 // Produce this:
4768 //
4769 // cmn z, y
4770 if (isCMN(RHSDef, P, MRI))
4771 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4772
4773 // Given this:
4774 //
4775 // z = G_AND x, y
4776 // G_ICMP z, 0
4777 //
4778 // Produce this if the compare is signed:
4779 //
4780 // tst x, y
4781 if (!CmpInst::isUnsigned(P) && LHSDef &&
4782 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4783 // Make sure that the RHS is 0.
4784 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4785 if (!ValAndVReg || ValAndVReg->Value != 0)
4786 return nullptr;
4787
4788 return emitTST(LHSDef->getOperand(1),
4789 LHSDef->getOperand(2), MIRBuilder);
4790 }
4791
4792 return nullptr;
4793}
4794
4795bool AArch64InstructionSelector::selectShuffleVector(
4796 MachineInstr &I, MachineRegisterInfo &MRI) {
4797 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4798 Register Src1Reg = I.getOperand(1).getReg();
4799 const LLT Src1Ty = MRI.getType(Src1Reg);
4800 Register Src2Reg = I.getOperand(2).getReg();
4801 const LLT Src2Ty = MRI.getType(Src2Reg);
4802 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4803
4804 MachineBasicBlock &MBB = *I.getParent();
4805 MachineFunction &MF = *MBB.getParent();
4806 LLVMContext &Ctx = MF.getFunction().getContext();
4807
4808 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4809 // it's originated from a <1 x T> type. Those should have been lowered into
4810 // G_BUILD_VECTOR earlier.
4811 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4812 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4813 return false;
4814 }
4815
4816 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4817
4818 SmallVector<Constant *, 64> CstIdxs;
4819 for (int Val : Mask) {
4820 // For now, any undef indexes we'll just assume to be 0. This should be
4821 // optimized in future, e.g. to select DUP etc.
4822 Val = Val < 0 ? 0 : Val;
4823 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4824 unsigned Offset = Byte + Val * BytesPerElt;
4825 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4826 }
4827 }
4828
4829 // Use a constant pool to load the index vector for TBL.
4830 Constant *CPVal = ConstantVector::get(CstIdxs);
4831 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4832 if (!IndexLoad) {
4833 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4834 return false;
4835 }
4836
4837 if (DstTy.getSizeInBits() != 128) {
4838 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4838, __extension__ __PRETTY_FUNCTION__))
;
4839 // This case can be done with TBL1.
4840 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4841 if (!Concat) {
4842 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4843 return false;
4844 }
4845
4846 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4847 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4848 IndexLoad->getOperand(0).getReg(), MIB);
4849
4850 auto TBL1 = MIB.buildInstr(
4851 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4852 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4853 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4854
4855 auto Copy =
4856 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4857 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4858 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4859 I.eraseFromParent();
4860 return true;
4861 }
4862
4863 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4864 // Q registers for regalloc.
4865 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4866 auto RegSeq = createQTuple(Regs, MIB);
4867 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4868 {RegSeq, IndexLoad->getOperand(0)});
4869 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4870 I.eraseFromParent();
4871 return true;
4872}
4873
4874MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4875 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4876 unsigned LaneIdx, const RegisterBank &RB,
4877 MachineIRBuilder &MIRBuilder) const {
4878 MachineInstr *InsElt = nullptr;
4879 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4880 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4881
4882 // Create a register to define with the insert if one wasn't passed in.
4883 if (!DstReg)
4884 DstReg = MRI.createVirtualRegister(DstRC);
4885
4886 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4887 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4888
4889 if (RB.getID() == AArch64::FPRRegBankID) {
4890 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4891 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4892 .addImm(LaneIdx)
4893 .addUse(InsSub->getOperand(0).getReg())
4894 .addImm(0);
4895 } else {
4896 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4897 .addImm(LaneIdx)
4898 .addUse(EltReg);
4899 }
4900
4901 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4902 return InsElt;
4903}
4904
4905bool AArch64InstructionSelector::selectUSMovFromExtend(
4906 MachineInstr &MI, MachineRegisterInfo &MRI) {
4907 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
4908 MI.getOpcode() != TargetOpcode::G_ZEXT &&
4909 MI.getOpcode() != TargetOpcode::G_ANYEXT)
4910 return false;
4911 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
4912 const Register DefReg = MI.getOperand(0).getReg();
4913 const LLT DstTy = MRI.getType(DefReg);
4914 unsigned DstSize = DstTy.getSizeInBits();
4915
4916 if (DstSize != 32 && DstSize != 64)
4917 return false;
4918
4919 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
4920 MI.getOperand(1).getReg(), MRI);
4921 int64_t Lane;
4922 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
4923 return false;
4924 Register Src0 = Extract->getOperand(1).getReg();
4925
4926 const LLT &VecTy = MRI.getType(Src0);
4927
4928 if (VecTy.getSizeInBits() != 128) {
4929 const MachineInstr *ScalarToVector = emitScalarToVector(
4930 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
4931 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4931, __extension__ __PRETTY_FUNCTION__))
;
4932 Src0 = ScalarToVector->getOperand(0).getReg();
4933 }
4934
4935 unsigned Opcode;
4936 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
4937 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
4938 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
4939 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
4940 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
4941 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
4942 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
4943 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
4944 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
4945 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
4946 else
4947 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4947)
;
4948
4949 // We may need to generate one of these, depending on the type and sign of the
4950 // input:
4951 // DstReg = SMOV Src0, Lane;
4952 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
4953 MachineInstr *ExtI = nullptr;
4954 if (DstSize == 64 && !IsSigned) {
4955 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4956 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
4957 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
4958 .addImm(0)
4959 .addUse(NewReg)
4960 .addImm(AArch64::sub_32);
4961 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
4962 } else
4963 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
4964
4965 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
4966 MI.eraseFromParent();
4967 return true;
4968}
4969
4970bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4971 MachineRegisterInfo &MRI) {
4972 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4972, __extension__ __PRETTY_FUNCTION__))
;
4973
4974 // Get information on the destination.
4975 Register DstReg = I.getOperand(0).getReg();
4976 const LLT DstTy = MRI.getType(DstReg);
4977 unsigned VecSize = DstTy.getSizeInBits();
4978
4979 // Get information on the element we want to insert into the destination.
4980 Register EltReg = I.getOperand(2).getReg();
4981 const LLT EltTy = MRI.getType(EltReg);
4982 unsigned EltSize = EltTy.getSizeInBits();
4983 if (EltSize < 16 || EltSize > 64)
4984 return false; // Don't support all element types yet.
4985
4986 // Find the definition of the index. Bail out if it's not defined by a
4987 // G_CONSTANT.
4988 Register IdxReg = I.getOperand(3).getReg();
4989 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
4990 if (!VRegAndVal)
4991 return false;
4992 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4993
4994 // Perform the lane insert.
4995 Register SrcReg = I.getOperand(1).getReg();
4996 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4997
4998 if (VecSize < 128) {
4999 // If the vector we're inserting into is smaller than 128 bits, widen it
5000 // to 128 to do the insert.
5001 MachineInstr *ScalarToVec =
5002 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5003 if (!ScalarToVec)
5004 return false;
5005 SrcReg = ScalarToVec->getOperand(0).getReg();
5006 }
5007
5008 // Create an insert into a new FPR128 register.
5009 // Note that if our vector is already 128 bits, we end up emitting an extra
5010 // register.
5011 MachineInstr *InsMI =
5012 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5013
5014 if (VecSize < 128) {
5015 // If we had to widen to perform the insert, then we have to demote back to
5016 // the original size to get the result we want.
5017 Register DemoteVec = InsMI->getOperand(0).getReg();
5018 const TargetRegisterClass *RC =
5019 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
5020 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5021 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5022 return false;
5023 }
5024 unsigned SubReg = 0;
5025 if (!getSubRegForClass(RC, TRI, SubReg))
5026 return false;
5027 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5028 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5029 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5030 return false;
5031 }
5032 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5033 .addReg(DemoteVec, 0, SubReg);
5034 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5035 } else {
5036 // No widening needed.
5037 InsMI->getOperand(0).setReg(DstReg);
5038 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5039 }
5040
5041 I.eraseFromParent();
5042 return true;
5043}
5044
5045MachineInstr *
5046AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5047 MachineIRBuilder &MIRBuilder,
5048 MachineRegisterInfo &MRI) {
5049 LLT DstTy = MRI.getType(Dst);
5050 unsigned DstSize = DstTy.getSizeInBits();
5051 if (CV->isNullValue()) {
5052 if (DstSize == 128) {
5053 auto Mov =
5054 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5055 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5056 return &*Mov;
5057 }
5058
5059 if (DstSize == 64) {
5060 auto Mov =
5061 MIRBuilder
5062 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5063 .addImm(0);
5064 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5065 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5066 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5067 return &*Copy;
5068 }
5069 }
5070
5071 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5072 if (!CPLoad) {
5073 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5074 return nullptr;
5075 }
5076
5077 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5078 RBI.constrainGenericRegister(
5079 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5080 return &*Copy;
5081}
5082
5083bool AArch64InstructionSelector::tryOptConstantBuildVec(
5084 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5085 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5085, __extension__ __PRETTY_FUNCTION__))
;
5086 unsigned DstSize = DstTy.getSizeInBits();
5087 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5087, __extension__ __PRETTY_FUNCTION__))
;
5088 if (DstSize < 32)
5089 return false;
5090 // Check if we're building a constant vector, in which case we want to
5091 // generate a constant pool load instead of a vector insert sequence.
5092 SmallVector<Constant *, 16> Csts;
5093 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5094 // Try to find G_CONSTANT or G_FCONSTANT
5095 auto *OpMI =
5096 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5097 if (OpMI)
5098 Csts.emplace_back(
5099 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5100 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5101 I.getOperand(Idx).getReg(), MRI)))
5102 Csts.emplace_back(
5103 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5104 else
5105 return false;
5106 }
5107 Constant *CV = ConstantVector::get(Csts);
5108 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5109 return false;
5110 I.eraseFromParent();
5111 return true;
5112}
5113
5114bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5115 MachineInstr &I, MachineRegisterInfo &MRI) {
5116 // Given:
5117 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5118 //
5119 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5120 Register Dst = I.getOperand(0).getReg();
5121 Register EltReg = I.getOperand(1).getReg();
5122 LLT EltTy = MRI.getType(EltReg);
5123 // If the index isn't on the same bank as its elements, then this can't be a
5124 // SUBREG_TO_REG.
5125 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5126 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5127 if (EltRB != DstRB)
5128 return false;
5129 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5130 [&MRI](const MachineOperand &Op) {
5131 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5132 MRI);
5133 }))
5134 return false;
5135 unsigned SubReg;
5136 const TargetRegisterClass *EltRC =
5137 getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
5138 if (!EltRC)
5139 return false;
5140 const TargetRegisterClass *DstRC =
5141 getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
5142 if (!DstRC)
5143 return false;
5144 if (!getSubRegForClass(EltRC, TRI, SubReg))
5145 return false;
5146 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5147 .addImm(0)
5148 .addUse(EltReg)
5149 .addImm(SubReg);
5150 I.eraseFromParent();
5151 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5152 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5153}
5154
5155bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5156 MachineRegisterInfo &MRI) {
5157 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5157, __extension__ __PRETTY_FUNCTION__))
;
5158 // Until we port more of the optimized selections, for now just use a vector
5159 // insert sequence.
5160 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5161 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5162 unsigned EltSize = EltTy.getSizeInBits();
5163
5164 if (tryOptConstantBuildVec(I, DstTy, MRI))
5165 return true;
5166 if (tryOptBuildVecToSubregToReg(I, MRI))
5167 return true;
5168
5169 if (EltSize < 16 || EltSize > 64)
5170 return false; // Don't support all element types yet.
5171 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5172
5173 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5174 MachineInstr *ScalarToVec =
5175 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5176 I.getOperand(1).getReg(), MIB);
5177 if (!ScalarToVec)
5178 return false;
5179
5180 Register DstVec = ScalarToVec->getOperand(0).getReg();
5181 unsigned DstSize = DstTy.getSizeInBits();
5182
5183 // Keep track of the last MI we inserted. Later on, we might be able to save
5184 // a copy using it.
5185 MachineInstr *PrevMI = nullptr;
5186 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5187 // Note that if we don't do a subregister copy, we can end up making an
5188 // extra register.
5189 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5190 MIB);
5191 DstVec = PrevMI->getOperand(0).getReg();
5192 }
5193
5194 // If DstTy's size in bits is less than 128, then emit a subregister copy
5195 // from DstVec to the last register we've defined.
5196 if (DstSize < 128) {
5197 // Force this to be FPR using the destination vector.
5198 const TargetRegisterClass *RC =
5199 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
5200 if (!RC)
5201 return false;
5202 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5203 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5204 return false;
5205 }
5206
5207 unsigned SubReg = 0;
5208 if (!getSubRegForClass(RC, TRI, SubReg))
5209 return false;
5210 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5211 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5212 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5213 return false;
5214 }
5215
5216 Register Reg = MRI.createVirtualRegister(RC);
5217 Register DstReg = I.getOperand(0).getReg();
5218
5219 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5220 MachineOperand &RegOp = I.getOperand(1);
5221 RegOp.setReg(Reg);
5222 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5223 } else {
5224 // We don't need a subregister copy. Save a copy by re-using the
5225 // destination register on the final insert.
5226 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5226, __extension__ __PRETTY_FUNCTION__))
;
5227 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5228 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5229 }
5230
5231 I.eraseFromParent();
5232 return true;
5233}
5234
5235bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5236 unsigned NumVecs,
5237 MachineInstr &I) {
5238 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5238, __extension__ __PRETTY_FUNCTION__))
;
5
Assuming the condition is true
6
'?' condition is true
5239 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5239, __extension__ __PRETTY_FUNCTION__))
;
7
'?' condition is true
5240 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5240, __extension__ __PRETTY_FUNCTION__))
;
8
'?' condition is true
5241 auto &MRI = *MIB.getMRI();
5242 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5243 unsigned Size = Ty.getSizeInBits();
5244 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5245, __extension__ __PRETTY_FUNCTION__))
9
Assuming 'Size' is not equal to 64
10
Assuming 'Size' is equal to 128
11
'?' condition is true
5245 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5245, __extension__ __PRETTY_FUNCTION__))
;
5246 unsigned SubReg = Size
11.1
'Size' is not equal to 64
11.1
'Size' is not equal to 64
11.1
'Size' is not equal to 64
== 64 ? AArch64::dsub0 : AArch64::qsub0;
12
'?' condition is false
5247 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5248 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5248, __extension__ __PRETTY_FUNCTION__))
;
13
'?' condition is true
5249 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5250 Load.cloneMemRefs(I);
5251 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5252 Register SelectedLoadDst = Load->getOperand(0).getReg();
5253 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
14
Loop condition is true. Entering loop body
5254 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5255 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5256 // Emit the subreg copies and immediately select them.
5257 // FIXME: We should refactor our copy code into an emitCopy helper and
5258 // clean up uses of this pattern elsewhere in the selector.
5259 selectCopy(*Vec, TII, MRI, TRI, RBI);
15
Calling 'selectCopy'
5260 }
5261 return true;
5262}
5263
5264bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5265 MachineInstr &I, MachineRegisterInfo &MRI) {
5266 // Find the intrinsic ID.
5267 unsigned IntrinID = I.getIntrinsicID();
5268
5269 const LLT S8 = LLT::scalar(8);
5270 const LLT S16 = LLT::scalar(16);
5271 const LLT S32 = LLT::scalar(32);
5272 const LLT S64 = LLT::scalar(64);
5273 const LLT P0 = LLT::pointer(0, 64);
5274 // Select the instruction.
5275 switch (IntrinID) {
1
Control jumps to 'case aarch64_neon_ld4:' at line 5322
5276 default:
5277 return false;
5278 case Intrinsic::aarch64_ldxp:
5279 case Intrinsic::aarch64_ldaxp: {
5280 auto NewI = MIB.buildInstr(
5281 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5282 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5283 {I.getOperand(3)});
5284 NewI.cloneMemRefs(I);
5285 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5286 break;
5287 }
5288 case Intrinsic::trap:
5289 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5290 break;
5291 case Intrinsic::debugtrap:
5292 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5293 break;
5294 case Intrinsic::ubsantrap:
5295 MIB.buildInstr(AArch64::BRK, {}, {})
5296 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5297 break;
5298 case Intrinsic::aarch64_neon_ld2: {
5299 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5300 unsigned Opc = 0;
5301 if (Ty == LLT::fixed_vector(8, S8))
5302 Opc = AArch64::LD2Twov8b;
5303 else if (Ty == LLT::fixed_vector(16, S8))
5304 Opc = AArch64::LD2Twov16b;
5305 else if (Ty == LLT::fixed_vector(4, S16))
5306 Opc = AArch64::LD2Twov4h;
5307 else if (Ty == LLT::fixed_vector(8, S16))
5308 Opc = AArch64::LD2Twov8h;
5309 else if (Ty == LLT::fixed_vector(2, S32))
5310 Opc = AArch64::LD2Twov2s;
5311 else if (Ty == LLT::fixed_vector(4, S32))
5312 Opc = AArch64::LD2Twov4s;
5313 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5314 Opc = AArch64::LD2Twov2d;
5315 else if (Ty == S64 || Ty == P0)
5316 Opc = AArch64::LD1Twov1d;
5317 else
5318 llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5318)
;
5319 selectVectorLoadIntrinsic(Opc, 2, I);
5320 break;
5321 }
5322 case Intrinsic::aarch64_neon_ld4: {
5323 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5324 unsigned Opc = 0;
5325 if (Ty == LLT::fixed_vector(8, S8))
2
Taking false branch
5326 Opc = AArch64::LD4Fourv8b;
5327 else if (Ty == LLT::fixed_vector(16, S8))
3
Taking true branch
5328 Opc = AArch64::LD4Fourv16b;
5329 else if (Ty == LLT::fixed_vector(4, S16))
5330 Opc = AArch64::LD4Fourv4h;
5331 else if (Ty == LLT::fixed_vector(8, S16))
5332 Opc = AArch64::LD4Fourv8h;
5333 else if (Ty == LLT::fixed_vector(2, S32))
5334 Opc = AArch64::LD4Fourv2s;
5335 else if (Ty == LLT::fixed_vector(4, S32))
5336 Opc = AArch64::LD4Fourv4s;
5337 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5338 Opc = AArch64::LD4Fourv2d;
5339 else if (Ty == S64 || Ty == P0)
5340 Opc = AArch64::LD1Fourv1d;
5341 else
5342 llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5342)
;
5343 selectVectorLoadIntrinsic(Opc, 4, I);
4
Calling 'AArch64InstructionSelector::selectVectorLoadIntrinsic'
5344 break;
5345 }
5346 case Intrinsic::aarch64_neon_st2: {
5347 Register Src1 = I.getOperand(1).getReg();
5348 Register Src2 = I.getOperand(2).getReg();
5349 Register Ptr = I.getOperand(3).getReg();
5350 LLT Ty = MRI.getType(Src1);
5351 unsigned Opc;
5352 if (Ty == LLT::fixed_vector(8, S8))
5353 Opc = AArch64::ST2Twov8b;
5354 else if (Ty == LLT::fixed_vector(16, S8))
5355 Opc = AArch64::ST2Twov16b;
5356 else if (Ty == LLT::fixed_vector(4, S16))
5357 Opc = AArch64::ST2Twov4h;
5358 else if (Ty == LLT::fixed_vector(8, S16))
5359 Opc = AArch64::ST2Twov8h;
5360 else if (Ty == LLT::fixed_vector(2, S32))
5361 Opc = AArch64::ST2Twov2s;
5362 else if (Ty == LLT::fixed_vector(4, S32))
5363 Opc = AArch64::ST2Twov4s;
5364 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5365 Opc = AArch64::ST2Twov2d;
5366 else if (Ty == S64 || Ty == P0)
5367 Opc = AArch64::ST1Twov1d;
5368 else
5369 llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5369)
;
5370 SmallVector<Register, 2> Regs = {Src1, Src2};
5371 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5372 : createDTuple(Regs, MIB);
5373 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5374 Store.cloneMemRefs(I);
5375 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5376 break;
5377 }
5378 }
5379
5380 I.eraseFromParent();
5381 return true;
5382}
5383
5384bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5385 MachineRegisterInfo &MRI) {
5386 unsigned IntrinID = I.getIntrinsicID();
5387
5388 switch (IntrinID) {
5389 default:
5390 break;
5391 case Intrinsic::aarch64_crypto_sha1h: {
5392 Register DstReg = I.getOperand(0).getReg();
5393 Register SrcReg = I.getOperand(2).getReg();
5394
5395 // FIXME: Should this be an assert?
5396 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5397 MRI.getType(SrcReg).getSizeInBits() != 32)
5398 return false;
5399
5400 // The operation has to happen on FPRs. Set up some new FPR registers for
5401 // the source and destination if they are on GPRs.
5402 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5403 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5404 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5405
5406 // Make sure the copy ends up getting constrained properly.
5407 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5408 AArch64::GPR32RegClass, MRI);
5409 }
5410
5411 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5412 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5413
5414 // Actually insert the instruction.
5415 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5416 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5417
5418 // Did we create a new register for the destination?
5419 if (DstReg != I.getOperand(0).getReg()) {
5420 // Yep. Copy the result of the instruction back into the original
5421 // destination.
5422 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5423 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5424 AArch64::GPR32RegClass, MRI);
5425 }
5426
5427 I.eraseFromParent();
5428 return true;
5429 }
5430 case Intrinsic::ptrauth_sign: {
5431 Register DstReg = I.getOperand(0).getReg();
5432 Register ValReg = I.getOperand(2).getReg();
5433 uint64_t Key = I.getOperand(3).getImm();
5434 Register DiscReg = I.getOperand(4).getReg();
5435 auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
5436 bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
5437
5438 if (Key > 3)
5439 return false;
5440
5441 unsigned Opcodes[][4] = {
5442 {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
5443 {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
5444 unsigned Opcode = Opcodes[IsDiscZero][Key];
5445
5446 auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5447
5448 if (!IsDiscZero) {
5449 PAC.addUse(DiscReg);
5450 RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
5451 }
5452
5453 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5454 I.eraseFromParent();
5455 return true;
5456 }
5457 case Intrinsic::frameaddress:
5458 case Intrinsic::returnaddress: {
5459 MachineFunction &MF = *I.getParent()->getParent();
5460 MachineFrameInfo &MFI = MF.getFrameInfo();
5461
5462 unsigned Depth = I.getOperand(2).getImm();
5463 Register DstReg = I.getOperand(0).getReg();