Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 3599, column 30
The left operand of '==' is a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/ADT/Optional.h"
24#include "llvm/BinaryFormat/Dwarf.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if None is
148 /// provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *emitFPCompare(Register LHS, Register RHS,
220 MachineIRBuilder &MIRBuilder,
221 Optional<CmpInst::Predicate> = None) const;
222
223 MachineInstr *emitInstr(unsigned Opcode,
224 std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = None) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510//
511/// Given a register bank, and a type, return the smallest register class that
512/// can represent that combination.
513static const TargetRegisterClass *
514getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
515 bool GetAllRegSet = false) {
516 if (RB.getID() == AArch64::GPRRegBankID) {
517 if (Ty.getSizeInBits() <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (Ty.getSizeInBits() == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
523 if (Ty.getSizeInBits() == 128)
524 return &AArch64::XSeqPairsClassRegClass;
525 return nullptr;
526 }
527
528 if (RB.getID() == AArch64::FPRRegBankID) {
529 switch (Ty.getSizeInBits()) {
530 case 8:
531 return &AArch64::FPR8RegClass;
532 case 16:
533 return &AArch64::FPR16RegClass;
534 case 32:
535 return &AArch64::FPR32RegClass;
536 case 64:
537 return &AArch64::FPR64RegClass;
538 case 128:
539 return &AArch64::FPR128RegClass;
540 }
541 return nullptr;
542 }
543
544 return nullptr;
545}
546
547/// Given a register bank, and size in bits, return the smallest register class
548/// that can represent that combination.
549static const TargetRegisterClass *
550getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
551 bool GetAllRegSet = false) {
552 unsigned RegBankID = RB.getID();
553
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
563 }
564
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
567 default:
568 return nullptr;
569 case 8:
570 return &AArch64::FPR8RegClass;
571 case 16:
572 return &AArch64::FPR16RegClass;
573 case 32:
574 return &AArch64::FPR32RegClass;
575 case 64:
576 return &AArch64::FPR64RegClass;
577 case 128:
578 return &AArch64::FPR128RegClass;
579 }
580 }
581
582 return nullptr;
583}
584
585/// Returns the correct subregister to use for a given register class.
586static bool getSubRegForClass(const TargetRegisterClass *RC,
587 const TargetRegisterInfo &TRI, unsigned &SubReg) {
588 switch (TRI.getRegSizeInBits(*RC)) {
589 case 8:
590 SubReg = AArch64::bsub;
591 break;
592 case 16:
593 SubReg = AArch64::hsub;
594 break;
595 case 32:
596 if (RC != &AArch64::FPR32RegClass)
597 SubReg = AArch64::sub_32;
598 else
599 SubReg = AArch64::ssub;
600 break;
601 case 64:
602 SubReg = AArch64::dsub;
603 break;
604 default:
605 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
606 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
607 return false;
608 }
609
610 return true;
611}
612
613/// Returns the minimum size the given register bank can hold.
614static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
615 switch (RB.getID()) {
616 case AArch64::GPRRegBankID:
617 return 32;
618 case AArch64::FPRRegBankID:
619 return 8;
620 default:
621 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 621)
;
622 }
623}
624
625/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
626/// Helper function for functions like createDTuple and createQTuple.
627///
628/// \p RegClassIDs - The list of register class IDs available for some tuple of
629/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
630/// expected to contain between 2 and 4 tuple classes.
631///
632/// \p SubRegs - The list of subregister classes associated with each register
633/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
634/// subregister class. The index of each subregister class is expected to
635/// correspond with the index of each register class.
636///
637/// \returns Either the destination register of REG_SEQUENCE instruction that
638/// was created, or the 0th element of \p Regs if \p Regs contains a single
639/// element.
640static Register createTuple(ArrayRef<Register> Regs,
641 const unsigned RegClassIDs[],
642 const unsigned SubRegs[], MachineIRBuilder &MIB) {
643 unsigned NumRegs = Regs.size();
644 if (NumRegs == 1)
645 return Regs[0];
646 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
647 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
;
648 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
649 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
650 auto RegSequence =
651 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
653 RegSequence.addUse(Regs[I]);
654 RegSequence.addImm(SubRegs[I]);
655 }
656 return RegSequence.getReg(0);
657}
658
659/// Create a tuple of D-registers using the registers in \p Regs.
660static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
666}
667
668/// Create a tuple of Q-registers using the registers in \p Regs.
669static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
675}
676
677static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
678 auto &MI = *Root.getParent();
679 auto &MBB = *MI.getParent();
680 auto &MF = *MBB.getParent();
681 auto &MRI = MF.getRegInfo();
682 uint64_t Immed;
683 if (Root.isImm())
684 Immed = Root.getImm();
685 else if (Root.isCImm())
686 Immed = Root.getCImm()->getZExtValue();
687 else if (Root.isReg()) {
688 auto ValAndVReg =
689 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
690 if (!ValAndVReg)
691 return None;
692 Immed = ValAndVReg->Value.getSExtValue();
693 } else
694 return None;
695 return Immed;
696}
697
698/// Check whether \p I is a currently unsupported binary operation:
699/// - it has an unsized type
700/// - an operand is not a vreg
701/// - all operands are not in the same bank
702/// These are checks that should someday live in the verifier, but right now,
703/// these are mostly limitations of the aarch64 selector.
704static bool unsupportedBinOp(const MachineInstr &I,
705 const AArch64RegisterBankInfo &RBI,
706 const MachineRegisterInfo &MRI,
707 const AArch64RegisterInfo &TRI) {
708 LLT Ty = MRI.getType(I.getOperand(0).getReg());
709 if (!Ty.isValid()) {
710 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
711 return true;
712 }
713
714 const RegisterBank *PrevOpBank = nullptr;
715 for (auto &MO : I.operands()) {
716 // FIXME: Support non-register operands.
717 if (!MO.isReg()) {
718 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
719 return true;
720 }
721
722 // FIXME: Can generic operations have physical registers operands? If
723 // so, this will need to be taught about that, and we'll need to get the
724 // bank out of the minimal class for the register.
725 // Either way, this needs to be documented (and possibly verified).
726 if (!Register::isVirtualRegister(MO.getReg())) {
727 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
728 return true;
729 }
730
731 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
732 if (!OpBank) {
733 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
734 return true;
735 }
736
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
739 return true;
740 }
741 PrevOpBank = OpBank;
742 }
743 return false;
744}
745
746/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
747/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
748/// and of size \p OpSize.
749/// \returns \p GenericOpc if the combination is unsupported.
750static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
751 unsigned OpSize) {
752 switch (RegBankID) {
753 case AArch64::GPRRegBankID:
754 if (OpSize == 32) {
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
762 default:
763 return GenericOpc;
764 }
765 } else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
775 default:
776 return GenericOpc;
777 }
778 }
779 break;
780 case AArch64::FPRRegBankID:
781 switch (OpSize) {
782 case 32:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
792 default:
793 return GenericOpc;
794 }
795 case 64:
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
807 default:
808 return GenericOpc;
809 }
810 }
811 break;
812 }
813 return GenericOpc;
814}
815
816/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
817/// appropriate for the (value) register bank \p RegBankID and of memory access
818/// size \p OpSize. This returns the variant with the base+unsigned-immediate
819/// addressing mode (e.g., LDRXui).
820/// \returns \p GenericOpc if the combination is unsupported.
821static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
822 unsigned OpSize) {
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
824 switch (RegBankID) {
825 case AArch64::GPRRegBankID:
826 switch (OpSize) {
827 case 8:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
829 case 16:
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
831 case 32:
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
833 case 64:
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
835 }
836 break;
837 case AArch64::FPRRegBankID:
838 switch (OpSize) {
839 case 8:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
841 case 16:
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
843 case 32:
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
845 case 64:
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
847 case 128:
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
849 }
850 break;
851 }
852 return GenericOpc;
853}
854
855/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
856/// to \p *To.
857///
858/// E.g "To = COPY SrcReg:SubReg"
859static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
860 const RegisterBankInfo &RBI, Register SrcReg,
861 const TargetRegisterClass *To, unsigned SubReg) {
862 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __extension__ __PRETTY_FUNCTION__))
;
864 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 864, __extension__ __PRETTY_FUNCTION__))
;
865
866 MachineIRBuilder MIB(I);
867 auto SubRegCopy =
868 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
869 MachineOperand &RegOp = I.getOperand(1);
870 RegOp.setReg(SubRegCopy.getReg(0));
871
872 // It's possible that the destination register won't be constrained. Make
873 // sure that happens.
874 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
875 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
876
877 return true;
878}
879
880/// Helper function to get the source and destination register classes for a
881/// copy. Returns a std::pair containing the source register class for the
882/// copy, and the destination register class for the copy. If a register class
883/// cannot be determined, then it will be nullptr.
884static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
885getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
886 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
887 const RegisterBankInfo &RBI) {
888 Register DstReg = I.getOperand(0).getReg();
889 Register SrcReg = I.getOperand(1).getReg();
890 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
891 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
892 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
893 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
894
895 // Special casing for cross-bank copies of s1s. We can technically represent
896 // a 1-bit value with any size of register. The minimum size for a GPR is 32
897 // bits. So, we need to put the FPR on 32 bits as well.
898 //
899 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
900 // then we can pull it into the helpers that get the appropriate class for a
901 // register bank. Or make a new helper that carries along some constraint
902 // information.
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
905
906 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
907 getMinClassForRegBank(DstRegBank, DstSize, true)};
908}
909
910// FIXME: We need some sort of API in RBI/TRI to allow generic code to
911// constrain operands of simple instructions given a TargetRegisterClass
912// and LLT
913static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
914 const RegisterBankInfo &RBI) {
915 for (MachineOperand &MO : I.operands()) {
916 if (!MO.isReg())
917 continue;
918 Register Reg = MO.getReg();
919 if (!Reg)
920 continue;
921 if (Reg.isPhysical())
922 continue;
923 LLT Ty = MRI.getType(Reg);
924 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
925 const TargetRegisterClass *RC =
926 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
927 if (!RC) {
928 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
929 RC = getRegClassForTypeOnBank(Ty, RB);
930 if (!RC) {
931 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
932 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
;
933 break;
934 }
935 }
936 RBI.constrainGenericRegister(Reg, *RC, MRI);
937 }
938
939 return true;
940}
941
942static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
943 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
944 const RegisterBankInfo &RBI) {
945 Register DstReg = I.getOperand(0).getReg();
946 Register SrcReg = I.getOperand(1).getReg();
947 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
948 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
949
950 // Find the correct register classes for the source and destination registers.
951 const TargetRegisterClass *SrcRC;
952 const TargetRegisterClass *DstRC;
953 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
954
955 if (!DstRC) {
956 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
957 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
958 return false;
959 }
960
961 // Is this a copy? If so, then we may need to insert a subregister copy.
962 if (I.isCopy()) {
963 // Yes. Check if there's anything to fix up.
964 if (!SrcRC) {
965 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
966 return false;
967 }
968
969 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
970 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
971 unsigned SubReg;
972
973 // If the source bank doesn't support a subregister copy small enough,
974 // then we first need to copy to the destination bank.
975 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
976 const TargetRegisterClass *DstTempRC =
977 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
978 getSubRegForClass(DstRC, TRI, SubReg);
979
980 MachineIRBuilder MIB(I);
981 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
982 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
983 } else if (SrcSize > DstSize) {
984 // If the source register is bigger than the destination we need to
985 // perform a subregister copy.
986 const TargetRegisterClass *SubRegRC =
987 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
988 getSubRegForClass(SubRegRC, TRI, SubReg);
989 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
990 } else if (DstSize > SrcSize) {
991 // If the destination register is bigger than the source we need to do
992 // a promotion using SUBREG_TO_REG.
993 const TargetRegisterClass *PromotionRC =
994 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
995 getSubRegForClass(SrcRC, TRI, SubReg);
996
997 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
998 BuildMI(*I.getParent(), I, I.getDebugLoc(),
999 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1000 .addImm(0)
1001 .addUse(SrcReg)
1002 .addImm(SubReg);
1003 MachineOperand &RegOp = I.getOperand(1);
1004 RegOp.setReg(PromoteReg);
1005 }
1006
1007 // If the destination is a physical register, then there's nothing to
1008 // change, so we're done.
1009 if (Register::isPhysicalRegister(DstReg))
1010 return true;
1011 }
1012
1013 // No need to constrain SrcReg. It will get constrained when we hit another
1014 // of its use or its defs. Copies do not have constraints.
1015 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1016 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1017 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1018 return false;
1019 }
1020
1021 // If this a GPR ZEXT that we want to just reduce down into a copy.
1022 // The sizes will be mismatched with the source < 32b but that's ok.
1023 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1024 I.setDesc(TII.get(AArch64::COPY));
1025 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1025, __extension__ __PRETTY_FUNCTION__))
;
1026 return selectCopy(I, TII, MRI, TRI, RBI);
1027 }
1028
1029 I.setDesc(TII.get(AArch64::COPY));
1030 return true;
1031}
1032
1033static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1034 if (!DstTy.isScalar() || !SrcTy.isScalar())
1035 return GenericOpc;
1036
1037 const unsigned DstSize = DstTy.getSizeInBits();
1038 const unsigned SrcSize = SrcTy.getSizeInBits();
1039
1040 switch (DstSize) {
1041 case 32:
1042 switch (SrcSize) {
1043 case 32:
1044 switch (GenericOpc) {
1045 case TargetOpcode::G_SITOFP:
1046 return AArch64::SCVTFUWSri;
1047 case TargetOpcode::G_UITOFP:
1048 return AArch64::UCVTFUWSri;
1049 case TargetOpcode::G_FPTOSI:
1050 return AArch64::FCVTZSUWSr;
1051 case TargetOpcode::G_FPTOUI:
1052 return AArch64::FCVTZUUWSr;
1053 default:
1054 return GenericOpc;
1055 }
1056 case 64:
1057 switch (GenericOpc) {
1058 case TargetOpcode::G_SITOFP:
1059 return AArch64::SCVTFUXSri;
1060 case TargetOpcode::G_UITOFP:
1061 return AArch64::UCVTFUXSri;
1062 case TargetOpcode::G_FPTOSI:
1063 return AArch64::FCVTZSUWDr;
1064 case TargetOpcode::G_FPTOUI:
1065 return AArch64::FCVTZUUWDr;
1066 default:
1067 return GenericOpc;
1068 }
1069 default:
1070 return GenericOpc;
1071 }
1072 case 64:
1073 switch (SrcSize) {
1074 case 32:
1075 switch (GenericOpc) {
1076 case TargetOpcode::G_SITOFP:
1077 return AArch64::SCVTFUWDri;
1078 case TargetOpcode::G_UITOFP:
1079 return AArch64::UCVTFUWDri;
1080 case TargetOpcode::G_FPTOSI:
1081 return AArch64::FCVTZSUXSr;
1082 case TargetOpcode::G_FPTOUI:
1083 return AArch64::FCVTZUUXSr;
1084 default:
1085 return GenericOpc;
1086 }
1087 case 64:
1088 switch (GenericOpc) {
1089 case TargetOpcode::G_SITOFP:
1090 return AArch64::SCVTFUXDri;
1091 case TargetOpcode::G_UITOFP:
1092 return AArch64::UCVTFUXDri;
1093 case TargetOpcode::G_FPTOSI:
1094 return AArch64::FCVTZSUXDr;
1095 case TargetOpcode::G_FPTOUI:
1096 return AArch64::FCVTZUUXDr;
1097 default:
1098 return GenericOpc;
1099 }
1100 default:
1101 return GenericOpc;
1102 }
1103 default:
1104 return GenericOpc;
1105 };
1106 return GenericOpc;
1107}
1108
1109MachineInstr *
1110AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1111 Register False, AArch64CC::CondCode CC,
1112 MachineIRBuilder &MIB) const {
1113 MachineRegisterInfo &MRI = *MIB.getMRI();
1114 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1115 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1116 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
;
1117 LLT Ty = MRI.getType(True);
1118 if (Ty.isVector())
1119 return nullptr;
1120 const unsigned Size = Ty.getSizeInBits();
1121 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
1122 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
;
1123 const bool Is32Bit = Size == 32;
1124 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1125 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1126 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1127 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1128 return &*FCSel;
1129 }
1130
1131 // By default, we'll try and emit a CSEL.
1132 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1133 bool Optimized = false;
1134 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1135 &Optimized](Register &Reg, Register &OtherReg,
1136 bool Invert) {
1137 if (Optimized)
1138 return false;
1139
1140 // Attempt to fold:
1141 //
1142 // %sub = G_SUB 0, %x
1143 // %select = G_SELECT cc, %reg, %sub
1144 //
1145 // Into:
1146 // %select = CSNEG %reg, %x, cc
1147 Register MatchReg;
1148 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1149 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1150 Reg = MatchReg;
1151 if (Invert) {
1152 CC = AArch64CC::getInvertedCondCode(CC);
1153 std::swap(Reg, OtherReg);
1154 }
1155 return true;
1156 }
1157
1158 // Attempt to fold:
1159 //
1160 // %xor = G_XOR %x, -1
1161 // %select = G_SELECT cc, %reg, %xor
1162 //
1163 // Into:
1164 // %select = CSINV %reg, %x, cc
1165 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1166 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1167 Reg = MatchReg;
1168 if (Invert) {
1169 CC = AArch64CC::getInvertedCondCode(CC);
1170 std::swap(Reg, OtherReg);
1171 }
1172 return true;
1173 }
1174
1175 // Attempt to fold:
1176 //
1177 // %add = G_ADD %x, 1
1178 // %select = G_SELECT cc, %reg, %add
1179 //
1180 // Into:
1181 // %select = CSINC %reg, %x, cc
1182 if (mi_match(Reg, MRI,
1183 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1184 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1186 Reg = MatchReg;
1187 if (Invert) {
1188 CC = AArch64CC::getInvertedCondCode(CC);
1189 std::swap(Reg, OtherReg);
1190 }
1191 return true;
1192 }
1193
1194 return false;
1195 };
1196
1197 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1198 // true/false values are constants.
1199 // FIXME: All of these patterns already exist in tablegen. We should be
1200 // able to import these.
1201 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1202 &Optimized]() {
1203 if (Optimized)
1204 return false;
1205 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1206 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1207 if (!TrueCst && !FalseCst)
1208 return false;
1209
1210 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1211 if (TrueCst && FalseCst) {
1212 int64_t T = TrueCst->Value.getSExtValue();
1213 int64_t F = FalseCst->Value.getSExtValue();
1214
1215 if (T == 0 && F == 1) {
1216 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1217 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1218 True = ZReg;
1219 False = ZReg;
1220 return true;
1221 }
1222
1223 if (T == 0 && F == -1) {
1224 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1225 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1226 True = ZReg;
1227 False = ZReg;
1228 return true;
1229 }
1230 }
1231
1232 if (TrueCst) {
1233 int64_t T = TrueCst->Value.getSExtValue();
1234 if (T == 1) {
1235 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1236 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1237 True = False;
1238 False = ZReg;
1239 CC = AArch64CC::getInvertedCondCode(CC);
1240 return true;
1241 }
1242
1243 if (T == -1) {
1244 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1245 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1246 True = False;
1247 False = ZReg;
1248 CC = AArch64CC::getInvertedCondCode(CC);
1249 return true;
1250 }
1251 }
1252
1253 if (FalseCst) {
1254 int64_t F = FalseCst->Value.getSExtValue();
1255 if (F == 1) {
1256 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1257 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1258 False = ZReg;
1259 return true;
1260 }
1261
1262 if (F == -1) {
1263 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1264 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1265 False = ZReg;
1266 return true;
1267 }
1268 }
1269 return false;
1270 };
1271
1272 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1273 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1274 Optimized |= TryOptSelectCst();
1275 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1276 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1277 return &*SelectInst;
1278}
1279
1280static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1281 switch (P) {
1282 default:
1283 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1283)
;
1284 case CmpInst::ICMP_NE:
1285 return AArch64CC::NE;
1286 case CmpInst::ICMP_EQ:
1287 return AArch64CC::EQ;
1288 case CmpInst::ICMP_SGT:
1289 return AArch64CC::GT;
1290 case CmpInst::ICMP_SGE:
1291 return AArch64CC::GE;
1292 case CmpInst::ICMP_SLT:
1293 return AArch64CC::LT;
1294 case CmpInst::ICMP_SLE:
1295 return AArch64CC::LE;
1296 case CmpInst::ICMP_UGT:
1297 return AArch64CC::HI;
1298 case CmpInst::ICMP_UGE:
1299 return AArch64CC::HS;
1300 case CmpInst::ICMP_ULT:
1301 return AArch64CC::LO;
1302 case CmpInst::ICMP_ULE:
1303 return AArch64CC::LS;
1304 }
1305}
1306
1307/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1308static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1309 AArch64CC::CondCode &CondCode,
1310 AArch64CC::CondCode &CondCode2) {
1311 CondCode2 = AArch64CC::AL;
1312 switch (CC) {
1313 default:
1314 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1314)
;
1315 case CmpInst::FCMP_OEQ:
1316 CondCode = AArch64CC::EQ;
1317 break;
1318 case CmpInst::FCMP_OGT:
1319 CondCode = AArch64CC::GT;
1320 break;
1321 case CmpInst::FCMP_OGE:
1322 CondCode = AArch64CC::GE;
1323 break;
1324 case CmpInst::FCMP_OLT:
1325 CondCode = AArch64CC::MI;
1326 break;
1327 case CmpInst::FCMP_OLE:
1328 CondCode = AArch64CC::LS;
1329 break;
1330 case CmpInst::FCMP_ONE:
1331 CondCode = AArch64CC::MI;
1332 CondCode2 = AArch64CC::GT;
1333 break;
1334 case CmpInst::FCMP_ORD:
1335 CondCode = AArch64CC::VC;
1336 break;
1337 case CmpInst::FCMP_UNO:
1338 CondCode = AArch64CC::VS;
1339 break;
1340 case CmpInst::FCMP_UEQ:
1341 CondCode = AArch64CC::EQ;
1342 CondCode2 = AArch64CC::VS;
1343 break;
1344 case CmpInst::FCMP_UGT:
1345 CondCode = AArch64CC::HI;
1346 break;
1347 case CmpInst::FCMP_UGE:
1348 CondCode = AArch64CC::PL;
1349 break;
1350 case CmpInst::FCMP_ULT:
1351 CondCode = AArch64CC::LT;
1352 break;
1353 case CmpInst::FCMP_ULE:
1354 CondCode = AArch64CC::LE;
1355 break;
1356 case CmpInst::FCMP_UNE:
1357 CondCode = AArch64CC::NE;
1358 break;
1359 }
1360}
1361
1362/// Convert an IR fp condition code to an AArch64 CC.
1363/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1364/// should be AND'ed instead of OR'ed.
1365static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1366 AArch64CC::CondCode &CondCode,
1367 AArch64CC::CondCode &CondCode2) {
1368 CondCode2 = AArch64CC::AL;
1369 switch (CC) {
1370 default:
1371 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1372 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1372, __extension__ __PRETTY_FUNCTION__))
;
1373 break;
1374 case CmpInst::FCMP_ONE:
1375 // (a one b)
1376 // == ((a olt b) || (a ogt b))
1377 // == ((a ord b) && (a une b))
1378 CondCode = AArch64CC::VC;
1379 CondCode2 = AArch64CC::NE;
1380 break;
1381 case CmpInst::FCMP_UEQ:
1382 // (a ueq b)
1383 // == ((a uno b) || (a oeq b))
1384 // == ((a ule b) && (a uge b))
1385 CondCode = AArch64CC::PL;
1386 CondCode2 = AArch64CC::LE;
1387 break;
1388 }
1389}
1390
1391/// Return a register which can be used as a bit to test in a TB(N)Z.
1392static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1393 MachineRegisterInfo &MRI) {
1394 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1394, __extension__ __PRETTY_FUNCTION__))
;
1395 bool HasZext = false;
1396 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1397 unsigned Opc = MI->getOpcode();
1398
1399 if (!MI->getOperand(0).isReg() ||
1400 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1401 break;
1402
1403 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1404 //
1405 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1406 // on the truncated x is the same as the bit number on x.
1407 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1408 Opc == TargetOpcode::G_TRUNC) {
1409 if (Opc == TargetOpcode::G_ZEXT)
1410 HasZext = true;
1411
1412 Register NextReg = MI->getOperand(1).getReg();
1413 // Did we find something worth folding?
1414 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1415 break;
1416
1417 // NextReg is worth folding. Keep looking.
1418 Reg = NextReg;
1419 continue;
1420 }
1421
1422 // Attempt to find a suitable operation with a constant on one side.
1423 Optional<uint64_t> C;
1424 Register TestReg;
1425 switch (Opc) {
1426 default:
1427 break;
1428 case TargetOpcode::G_AND:
1429 case TargetOpcode::G_XOR: {
1430 TestReg = MI->getOperand(1).getReg();
1431 Register ConstantReg = MI->getOperand(2).getReg();
1432 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1433 if (!VRegAndVal) {
1434 // AND commutes, check the other side for a constant.
1435 // FIXME: Can we canonicalize the constant so that it's always on the
1436 // same side at some point earlier?
1437 std::swap(ConstantReg, TestReg);
1438 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1439 }
1440 if (VRegAndVal) {
1441 if (HasZext)
1442 C = VRegAndVal->Value.getZExtValue();
1443 else
1444 C = VRegAndVal->Value.getSExtValue();
1445 }
1446 break;
1447 }
1448 case TargetOpcode::G_ASHR:
1449 case TargetOpcode::G_LSHR:
1450 case TargetOpcode::G_SHL: {
1451 TestReg = MI->getOperand(1).getReg();
1452 auto VRegAndVal =
1453 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1454 if (VRegAndVal)
1455 C = VRegAndVal->Value.getSExtValue();
1456 break;
1457 }
1458 }
1459
1460 // Didn't find a constant or viable register. Bail out of the loop.
1461 if (!C || !TestReg.isValid())
1462 break;
1463
1464 // We found a suitable instruction with a constant. Check to see if we can
1465 // walk through the instruction.
1466 Register NextReg;
1467 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1468 switch (Opc) {
1469 default:
1470 break;
1471 case TargetOpcode::G_AND:
1472 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1473 if ((*C >> Bit) & 1)
1474 NextReg = TestReg;
1475 break;
1476 case TargetOpcode::G_SHL:
1477 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1478 // the type of the register.
1479 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1480 NextReg = TestReg;
1481 Bit = Bit - *C;
1482 }
1483 break;
1484 case TargetOpcode::G_ASHR:
1485 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1486 // in x
1487 NextReg = TestReg;
1488 Bit = Bit + *C;
1489 if (Bit >= TestRegSize)
1490 Bit = TestRegSize - 1;
1491 break;
1492 case TargetOpcode::G_LSHR:
1493 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1494 if ((Bit + *C) < TestRegSize) {
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 }
1498 break;
1499 case TargetOpcode::G_XOR:
1500 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1501 // appropriate.
1502 //
1503 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1504 //
1505 // tbz x', b -> tbnz x, b
1506 //
1507 // Because x' only has the b-th bit set if x does not.
1508 if ((*C >> Bit) & 1)
1509 Invert = !Invert;
1510 NextReg = TestReg;
1511 break;
1512 }
1513
1514 // Check if we found anything worth folding.
1515 if (!NextReg.isValid())
1516 return Reg;
1517 Reg = NextReg;
1518 }
1519
1520 return Reg;
1521}
1522
1523MachineInstr *AArch64InstructionSelector::emitTestBit(
1524 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1525 MachineIRBuilder &MIB) const {
1526 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
1528 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
;
1529 MachineRegisterInfo &MRI = *MIB.getMRI();
1530
1531 // Attempt to optimize the test bit by walking over instructions.
1532 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1533 LLT Ty = MRI.getType(TestReg);
1534 unsigned Size = Ty.getSizeInBits();
1535 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1535, __extension__ __PRETTY_FUNCTION__))
;
1536 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1536, __extension__ __PRETTY_FUNCTION__))
;
1537
1538 // When the test register is a 64-bit register, we have to narrow to make
1539 // TBNZW work.
1540 bool UseWReg = Bit < 32;
1541 unsigned NecessarySize = UseWReg ? 32 : 64;
1542 if (Size != NecessarySize)
1543 TestReg = moveScalarRegClass(
1544 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1545 MIB);
1546
1547 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1548 {AArch64::TBZW, AArch64::TBNZW}};
1549 unsigned Opc = OpcTable[UseWReg][IsNegative];
1550 auto TestBitMI =
1551 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1552 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1553 return &*TestBitMI;
1554}
1555
1556bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1557 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1558 MachineIRBuilder &MIB) const {
1559 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1559, __extension__ __PRETTY_FUNCTION__))
;
1560 // Given something like this:
1561 //
1562 // %x = ...Something...
1563 // %one = G_CONSTANT i64 1
1564 // %zero = G_CONSTANT i64 0
1565 // %and = G_AND %x, %one
1566 // %cmp = G_ICMP intpred(ne), %and, %zero
1567 // %cmp_trunc = G_TRUNC %cmp
1568 // G_BRCOND %cmp_trunc, %bb.3
1569 //
1570 // We want to try and fold the AND into the G_BRCOND and produce either a
1571 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1572 //
1573 // In this case, we'd get
1574 //
1575 // TBNZ %x %bb.3
1576 //
1577
1578 // Check if the AND has a constant on its RHS which we can use as a mask.
1579 // If it's a power of 2, then it's the same as checking a specific bit.
1580 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1581 auto MaybeBit = getIConstantVRegValWithLookThrough(
1582 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1583 if (!MaybeBit)
1584 return false;
1585
1586 int32_t Bit = MaybeBit->Value.exactLogBase2();
1587 if (Bit < 0)
1588 return false;
1589
1590 Register TestReg = AndInst.getOperand(1).getReg();
1591
1592 // Emit a TB(N)Z.
1593 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1594 return true;
1595}
1596
1597MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1598 bool IsNegative,
1599 MachineBasicBlock *DestMBB,
1600 MachineIRBuilder &MIB) const {
1601 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1601, __extension__ __PRETTY_FUNCTION__))
;
1602 MachineRegisterInfo &MRI = *MIB.getMRI();
1603 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1604 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1605 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
;
1606 auto Ty = MRI.getType(CompareReg);
1607 unsigned Width = Ty.getSizeInBits();
1608 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1608, __extension__ __PRETTY_FUNCTION__))
;
1609 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1609, __extension__ __PRETTY_FUNCTION__))
;
1610 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1611 {AArch64::CBNZW, AArch64::CBNZX}};
1612 unsigned Opc = OpcTable[IsNegative][Width == 64];
1613 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1614 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1615 return &*BranchMI;
1616}
1617
1618bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1619 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1620 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1620, __extension__ __PRETTY_FUNCTION__))
;
1621 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
;
1622 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1623 // totally clean. Some of them require two branches to implement.
1624 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1625 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1626 Pred);
1627 AArch64CC::CondCode CC1, CC2;
1628 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1629 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1630 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1631 if (CC2 != AArch64CC::AL)
1632 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1633 I.eraseFromParent();
1634 return true;
1635}
1636
1637bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1638 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1639 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1639, __extension__ __PRETTY_FUNCTION__))
;
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1640, __extension__ __PRETTY_FUNCTION__))
;
1641 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1642 //
1643 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1644 // instructions will not be produced, as they are conditional branch
1645 // instructions that do not set flags.
1646 if (!ProduceNonFlagSettingCondBr)
1647 return false;
1648
1649 MachineRegisterInfo &MRI = *MIB.getMRI();
1650 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1651 auto Pred =
1652 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1653 Register LHS = ICmp.getOperand(2).getReg();
1654 Register RHS = ICmp.getOperand(3).getReg();
1655
1656 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1657 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1658 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1659
1660 // When we can emit a TB(N)Z, prefer that.
1661 //
1662 // Handle non-commutative condition codes first.
1663 // Note that we don't want to do this when we have a G_AND because it can
1664 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1665 if (VRegAndVal && !AndInst) {
1666 int64_t C = VRegAndVal->Value.getSExtValue();
1667
1668 // When we have a greater-than comparison, we can just test if the msb is
1669 // zero.
1670 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1671 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1672 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1673 I.eraseFromParent();
1674 return true;
1675 }
1676
1677 // When we have a less than comparison, we can just test if the msb is not
1678 // zero.
1679 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1680 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1681 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1682 I.eraseFromParent();
1683 return true;
1684 }
1685 }
1686
1687 // Attempt to handle commutative condition codes. Right now, that's only
1688 // eq/ne.
1689 if (ICmpInst::isEquality(Pred)) {
1690 if (!VRegAndVal) {
1691 std::swap(RHS, LHS);
1692 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1693 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1694 }
1695
1696 if (VRegAndVal && VRegAndVal->Value == 0) {
1697 // If there's a G_AND feeding into this branch, try to fold it away by
1698 // emitting a TB(N)Z instead.
1699 //
1700 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1701 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1702 // would be redundant.
1703 if (AndInst &&
1704 tryOptAndIntoCompareBranch(
1705 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1706 I.eraseFromParent();
1707 return true;
1708 }
1709
1710 // Otherwise, try to emit a CB(N)Z instead.
1711 auto LHSTy = MRI.getType(LHS);
1712 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1713 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1714 I.eraseFromParent();
1715 return true;
1716 }
1717 }
1718 }
1719
1720 return false;
1721}
1722
1723bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1724 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1725 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1725, __extension__ __PRETTY_FUNCTION__))
;
1726 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1726, __extension__ __PRETTY_FUNCTION__))
;
1727 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1728 return true;
1729
1730 // Couldn't optimize. Emit a compare + a Bcc.
1731 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1732 auto PredOp = ICmp.getOperand(1);
1733 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1734 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1735 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1736 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1737 I.eraseFromParent();
1738 return true;
1739}
1740
1741bool AArch64InstructionSelector::selectCompareBranch(
1742 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1743 Register CondReg = I.getOperand(0).getReg();
1744 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1745 // Try to select the G_BRCOND using whatever is feeding the condition if
1746 // possible.
1747 unsigned CCMIOpc = CCMI->getOpcode();
1748 if (CCMIOpc == TargetOpcode::G_FCMP)
1749 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1750 if (CCMIOpc == TargetOpcode::G_ICMP)
1751 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1752
1753 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1754 // instructions will not be produced, as they are conditional branch
1755 // instructions that do not set flags.
1756 if (ProduceNonFlagSettingCondBr) {
1757 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1758 I.getOperand(1).getMBB(), MIB);
1759 I.eraseFromParent();
1760 return true;
1761 }
1762
1763 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1764 auto TstMI =
1765 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1766 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1767 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1768 .addImm(AArch64CC::EQ)
1769 .addMBB(I.getOperand(1).getMBB());
1770 I.eraseFromParent();
1771 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1772}
1773
1774/// Returns the element immediate value of a vector shift operand if found.
1775/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1776static Optional<int64_t> getVectorShiftImm(Register Reg,
1777 MachineRegisterInfo &MRI) {
1778 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1778, __extension__ __PRETTY_FUNCTION__))
;
1779 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1780 return getAArch64VectorSplatScalar(*OpMI, MRI);
1781}
1782
1783/// Matches and returns the shift immediate value for a SHL instruction given
1784/// a shift operand.
1785static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1786 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1787 if (!ShiftImm)
1788 return None;
1789 // Check the immediate is in range for a SHL.
1790 int64_t Imm = *ShiftImm;
1791 if (Imm < 0)
1792 return None;
1793 switch (SrcTy.getElementType().getSizeInBits()) {
1794 default:
1795 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1796 return None;
1797 case 8:
1798 if (Imm > 7)
1799 return None;
1800 break;
1801 case 16:
1802 if (Imm > 15)
1803 return None;
1804 break;
1805 case 32:
1806 if (Imm > 31)
1807 return None;
1808 break;
1809 case 64:
1810 if (Imm > 63)
1811 return None;
1812 break;
1813 }
1814 return Imm;
1815}
1816
1817bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1818 MachineRegisterInfo &MRI) {
1819 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1819, __extension__ __PRETTY_FUNCTION__))
;
1820 Register DstReg = I.getOperand(0).getReg();
1821 const LLT Ty = MRI.getType(DstReg);
1822 Register Src1Reg = I.getOperand(1).getReg();
1823 Register Src2Reg = I.getOperand(2).getReg();
1824
1825 if (!Ty.isVector())
1826 return false;
1827
1828 // Check if we have a vector of constants on RHS that we can select as the
1829 // immediate form.
1830 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1831
1832 unsigned Opc = 0;
1833 if (Ty == LLT::fixed_vector(2, 64)) {
1834 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1835 } else if (Ty == LLT::fixed_vector(4, 32)) {
1836 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1837 } else if (Ty == LLT::fixed_vector(2, 32)) {
1838 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1839 } else if (Ty == LLT::fixed_vector(4, 16)) {
1840 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1841 } else if (Ty == LLT::fixed_vector(8, 16)) {
1842 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1843 } else if (Ty == LLT::fixed_vector(16, 8)) {
1844 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1845 } else if (Ty == LLT::fixed_vector(8, 8)) {
1846 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1847 } else {
1848 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1849 return false;
1850 }
1851
1852 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1853 if (ImmVal)
1854 Shl.addImm(*ImmVal);
1855 else
1856 Shl.addUse(Src2Reg);
1857 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1858 I.eraseFromParent();
1859 return true;
1860}
1861
1862bool AArch64InstructionSelector::selectVectorAshrLshr(
1863 MachineInstr &I, MachineRegisterInfo &MRI) {
1864 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1865, __extension__ __PRETTY_FUNCTION__))
1865 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1865, __extension__ __PRETTY_FUNCTION__))
;
1866 Register DstReg = I.getOperand(0).getReg();
1867 const LLT Ty = MRI.getType(DstReg);
1868 Register Src1Reg = I.getOperand(1).getReg();
1869 Register Src2Reg = I.getOperand(2).getReg();
1870
1871 if (!Ty.isVector())
1872 return false;
1873
1874 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1875
1876 // We expect the immediate case to be lowered in the PostLegalCombiner to
1877 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1878
1879 // There is not a shift right register instruction, but the shift left
1880 // register instruction takes a signed value, where negative numbers specify a
1881 // right shift.
1882
1883 unsigned Opc = 0;
1884 unsigned NegOpc = 0;
1885 const TargetRegisterClass *RC =
1886 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1887 if (Ty == LLT::fixed_vector(2, 64)) {
1888 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1889 NegOpc = AArch64::NEGv2i64;
1890 } else if (Ty == LLT::fixed_vector(4, 32)) {
1891 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1892 NegOpc = AArch64::NEGv4i32;
1893 } else if (Ty == LLT::fixed_vector(2, 32)) {
1894 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1895 NegOpc = AArch64::NEGv2i32;
1896 } else if (Ty == LLT::fixed_vector(4, 16)) {
1897 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1898 NegOpc = AArch64::NEGv4i16;
1899 } else if (Ty == LLT::fixed_vector(8, 16)) {
1900 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1901 NegOpc = AArch64::NEGv8i16;
1902 } else if (Ty == LLT::fixed_vector(16, 8)) {
1903 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1904 NegOpc = AArch64::NEGv16i8;
1905 } else if (Ty == LLT::fixed_vector(8, 8)) {
1906 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1907 NegOpc = AArch64::NEGv8i8;
1908 } else {
1909 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1910 return false;
1911 }
1912
1913 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1914 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1915 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1916 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1917 I.eraseFromParent();
1918 return true;
1919}
1920
1921bool AArch64InstructionSelector::selectVaStartAAPCS(
1922 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1923 return false;
1924}
1925
1926bool AArch64InstructionSelector::selectVaStartDarwin(
1927 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1928 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1929 Register ListReg = I.getOperand(0).getReg();
1930
1931 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1932
1933 auto MIB =
1934 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1935 .addDef(ArgsAddrReg)
1936 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1937 .addImm(0)
1938 .addImm(0);
1939
1940 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1941
1942 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1943 .addUse(ArgsAddrReg)
1944 .addUse(ListReg)
1945 .addImm(0)
1946 .addMemOperand(*I.memoperands_begin());
1947
1948 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1949 I.eraseFromParent();
1950 return true;
1951}
1952
1953void AArch64InstructionSelector::materializeLargeCMVal(
1954 MachineInstr &I, const Value *V, unsigned OpFlags) {
1955 MachineBasicBlock &MBB = *I.getParent();
1956 MachineFunction &MF = *MBB.getParent();
1957 MachineRegisterInfo &MRI = MF.getRegInfo();
1958
1959 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1960 MovZ->addOperand(MF, I.getOperand(1));
1961 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1962 AArch64II::MO_NC);
1963 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1964 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1965
1966 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1967 Register ForceDstReg) {
1968 Register DstReg = ForceDstReg
1969 ? ForceDstReg
1970 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1971 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1972 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1973 MovI->addOperand(MF, MachineOperand::CreateGA(
1974 GV, MovZ->getOperand(1).getOffset(), Flags));
1975 } else {
1976 MovI->addOperand(
1977 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1978 MovZ->getOperand(1).getOffset(), Flags));
1979 }
1980 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1981 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1982 return DstReg;
1983 };
1984 Register DstReg = BuildMovK(MovZ.getReg(0),
1985 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1986 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1987 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1988}
1989
1990bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1991 MachineBasicBlock &MBB = *I.getParent();
1992 MachineFunction &MF = *MBB.getParent();
1993 MachineRegisterInfo &MRI = MF.getRegInfo();
1994
1995 switch (I.getOpcode()) {
1996 case TargetOpcode::G_STORE: {
1997 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1998 MachineOperand &SrcOp = I.getOperand(0);
1999 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2000 // Allow matching with imported patterns for stores of pointers. Unlike
2001 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2002 // and constrain.
2003 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2004 Register NewSrc = Copy.getReg(0);
2005 SrcOp.setReg(NewSrc);
2006 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2007 Changed = true;
2008 }
2009 return Changed;
2010 }
2011 case TargetOpcode::G_PTR_ADD:
2012 return convertPtrAddToAdd(I, MRI);
2013 case TargetOpcode::G_LOAD: {
2014 // For scalar loads of pointers, we try to convert the dest type from p0
2015 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2016 // conversion, this should be ok because all users should have been
2017 // selected already, so the type doesn't matter for them.
2018 Register DstReg = I.getOperand(0).getReg();
2019 const LLT DstTy = MRI.getType(DstReg);
2020 if (!DstTy.isPointer())
2021 return false;
2022 MRI.setType(DstReg, LLT::scalar(64));
2023 return true;
2024 }
2025 case AArch64::G_DUP: {
2026 // Convert the type from p0 to s64 to help selection.
2027 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2028 if (!DstTy.getElementType().isPointer())
2029 return false;
2030 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2031 MRI.setType(I.getOperand(0).getReg(),
2032 DstTy.changeElementType(LLT::scalar(64)));
2033 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2034 I.getOperand(1).setReg(NewSrc.getReg(0));
2035 return true;
2036 }
2037 case TargetOpcode::G_UITOFP:
2038 case TargetOpcode::G_SITOFP: {
2039 // If both source and destination regbanks are FPR, then convert the opcode
2040 // to G_SITOF so that the importer can select it to an fpr variant.
2041 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2042 // copy.
2043 Register SrcReg = I.getOperand(1).getReg();
2044 LLT SrcTy = MRI.getType(SrcReg);
2045 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2046 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2047 return false;
2048
2049 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2050 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2051 I.setDesc(TII.get(AArch64::G_SITOF));
2052 else
2053 I.setDesc(TII.get(AArch64::G_UITOF));
2054 return true;
2055 }
2056 return false;
2057 }
2058 default:
2059 return false;
2060 }
2061}
2062
2063/// This lowering tries to look for G_PTR_ADD instructions and then converts
2064/// them to a standard G_ADD with a COPY on the source.
2065///
2066/// The motivation behind this is to expose the add semantics to the imported
2067/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2068/// because the selector works bottom up, uses before defs. By the time we
2069/// end up trying to select a G_PTR_ADD, we should have already attempted to
2070/// fold this into addressing modes and were therefore unsuccessful.
2071bool AArch64InstructionSelector::convertPtrAddToAdd(
2072 MachineInstr &I, MachineRegisterInfo &MRI) {
2073 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2073, __extension__ __PRETTY_FUNCTION__))
;
2074 Register DstReg = I.getOperand(0).getReg();
2075 Register AddOp1Reg = I.getOperand(1).getReg();
2076 const LLT PtrTy = MRI.getType(DstReg);
2077 if (PtrTy.getAddressSpace() != 0)
2078 return false;
2079
2080 const LLT CastPtrTy =
2081 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2082 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2083 // Set regbanks on the registers.
2084 if (PtrTy.isVector())
2085 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2086 else
2087 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2088
2089 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2090 // %dst(intty) = G_ADD %intbase, off
2091 I.setDesc(TII.get(TargetOpcode::G_ADD));
2092 MRI.setType(DstReg, CastPtrTy);
2093 I.getOperand(1).setReg(PtrToInt.getReg(0));
2094 if (!select(*PtrToInt)) {
2095 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2096 return false;
2097 }
2098
2099 // Also take the opportunity here to try to do some optimization.
2100 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2101 Register NegatedReg;
2102 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2103 return true;
2104 I.getOperand(2).setReg(NegatedReg);
2105 I.setDesc(TII.get(TargetOpcode::G_SUB));
2106 return true;
2107}
2108
2109bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2110 MachineRegisterInfo &MRI) {
2111 // We try to match the immediate variant of LSL, which is actually an alias
2112 // for a special case of UBFM. Otherwise, we fall back to the imported
2113 // selector which will match the register variant.
2114 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2114, __extension__ __PRETTY_FUNCTION__))
;
2115 const auto &MO = I.getOperand(2);
2116 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2117 if (!VRegAndVal)
2118 return false;
2119
2120 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2121 if (DstTy.isVector())
2122 return false;
2123 bool Is64Bit = DstTy.getSizeInBits() == 64;
2124 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2125 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2126
2127 if (!Imm1Fn || !Imm2Fn)
2128 return false;
2129
2130 auto NewI =
2131 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2132 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2133
2134 for (auto &RenderFn : *Imm1Fn)
2135 RenderFn(NewI);
2136 for (auto &RenderFn : *Imm2Fn)
2137 RenderFn(NewI);
2138
2139 I.eraseFromParent();
2140 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2141}
2142
2143bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2144 MachineInstr &I, MachineRegisterInfo &MRI) {
2145 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2145, __extension__ __PRETTY_FUNCTION__))
;
2146 // If we're storing a scalar, it doesn't matter what register bank that
2147 // scalar is on. All that matters is the size.
2148 //
2149 // So, if we see something like this (with a 32-bit scalar as an example):
2150 //
2151 // %x:gpr(s32) = ... something ...
2152 // %y:fpr(s32) = COPY %x:gpr(s32)
2153 // G_STORE %y:fpr(s32)
2154 //
2155 // We can fix this up into something like this:
2156 //
2157 // G_STORE %x:gpr(s32)
2158 //
2159 // And then continue the selection process normally.
2160 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2161 if (!DefDstReg.isValid())
2162 return false;
2163 LLT DefDstTy = MRI.getType(DefDstReg);
2164 Register StoreSrcReg = I.getOperand(0).getReg();
2165 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2166
2167 // If we get something strange like a physical register, then we shouldn't
2168 // go any further.
2169 if (!DefDstTy.isValid())
2170 return false;
2171
2172 // Are the source and dst types the same size?
2173 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2174 return false;
2175
2176 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2177 RBI.getRegBank(DefDstReg, MRI, TRI))
2178 return false;
2179
2180 // We have a cross-bank copy, which is entering a store. Let's fold it.
2181 I.getOperand(0).setReg(DefDstReg);
2182 return true;
2183}
2184
2185bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2186 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2186, __extension__ __PRETTY_FUNCTION__))
;
2187 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2187, __extension__ __PRETTY_FUNCTION__))
;
2188
2189 MachineBasicBlock &MBB = *I.getParent();
2190 MachineFunction &MF = *MBB.getParent();
2191 MachineRegisterInfo &MRI = MF.getRegInfo();
2192
2193 switch (I.getOpcode()) {
2194 case AArch64::G_DUP: {
2195 // Before selecting a DUP instruction, check if it is better selected as a
2196 // MOV or load from a constant pool.
2197 Register Src = I.getOperand(1).getReg();
2198 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2199 if (!ValAndVReg)
2200 return false;
2201 LLVMContext &Ctx = MF.getFunction().getContext();
2202 Register Dst = I.getOperand(0).getReg();
2203 auto *CV = ConstantDataVector::getSplat(
2204 MRI.getType(Dst).getNumElements(),
2205 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2206 ValAndVReg->Value));
2207 if (!emitConstantVector(Dst, CV, MIB, MRI))
2208 return false;
2209 I.eraseFromParent();
2210 return true;
2211 }
2212 case TargetOpcode::G_SEXT:
2213 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2214 // over a normal extend.
2215 if (selectUSMovFromExtend(I, MRI))
2216 return true;
2217 return false;
2218 case TargetOpcode::G_BR:
2219 return false;
2220 case TargetOpcode::G_SHL:
2221 return earlySelectSHL(I, MRI);
2222 case TargetOpcode::G_CONSTANT: {
2223 bool IsZero = false;
2224 if (I.getOperand(1).isCImm())
2225 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2226 else if (I.getOperand(1).isImm())
2227 IsZero = I.getOperand(1).getImm() == 0;
2228
2229 if (!IsZero)
2230 return false;
2231
2232 Register DefReg = I.getOperand(0).getReg();
2233 LLT Ty = MRI.getType(DefReg);
2234 if (Ty.getSizeInBits() == 64) {
2235 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2236 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2237 } else if (Ty.getSizeInBits() == 32) {
2238 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2239 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2240 } else
2241 return false;
2242
2243 I.setDesc(TII.get(TargetOpcode::COPY));
2244 return true;
2245 }
2246
2247 case TargetOpcode::G_ADD: {
2248 // Check if this is being fed by a G_ICMP on either side.
2249 //
2250 // (cmp pred, x, y) + z
2251 //
2252 // In the above case, when the cmp is true, we increment z by 1. So, we can
2253 // fold the add into the cset for the cmp by using cinc.
2254 //
2255 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2256 Register AddDst = I.getOperand(0).getReg();
2257 Register AddLHS = I.getOperand(1).getReg();
2258 Register AddRHS = I.getOperand(2).getReg();
2259 // Only handle scalars.
2260 LLT Ty = MRI.getType(AddLHS);
2261 if (Ty.isVector())
2262 return false;
2263 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2264 // bits.
2265 unsigned Size = Ty.getSizeInBits();
2266 if (Size != 32 && Size != 64)
2267 return false;
2268 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2269 if (!MRI.hasOneNonDBGUse(Reg))
2270 return nullptr;
2271 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2272 // compare.
2273 if (Size == 32)
2274 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2275 // We model scalar compares using 32-bit destinations right now.
2276 // If it's a 64-bit compare, it'll have 64-bit sources.
2277 Register ZExt;
2278 if (!mi_match(Reg, MRI,
2279 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2280 return nullptr;
2281 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2282 if (!Cmp ||
2283 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2284 return nullptr;
2285 return Cmp;
2286 };
2287 // Try to match
2288 // z + (cmp pred, x, y)
2289 MachineInstr *Cmp = MatchCmp(AddRHS);
2290 if (!Cmp) {
2291 // (cmp pred, x, y) + z
2292 std::swap(AddLHS, AddRHS);
2293 Cmp = MatchCmp(AddRHS);
2294 if (!Cmp)
2295 return false;
2296 }
2297 auto &PredOp = Cmp->getOperand(1);
2298 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2299 const AArch64CC::CondCode InvCC =
2300 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2301 MIB.setInstrAndDebugLoc(I);
2302 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2303 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2304 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2305 I.eraseFromParent();
2306 return true;
2307 }
2308 case TargetOpcode::G_OR: {
2309 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2310 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2311 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2312 Register Dst = I.getOperand(0).getReg();
2313 LLT Ty = MRI.getType(Dst);
2314
2315 if (!Ty.isScalar())
2316 return false;
2317
2318 unsigned Size = Ty.getSizeInBits();
2319 if (Size != 32 && Size != 64)
2320 return false;
2321
2322 Register ShiftSrc;
2323 int64_t ShiftImm;
2324 Register MaskSrc;
2325 int64_t MaskImm;
2326 if (!mi_match(
2327 Dst, MRI,
2328 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2329 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2330 return false;
2331
2332 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2333 return false;
2334
2335 int64_t Immr = Size - ShiftImm;
2336 int64_t Imms = Size - ShiftImm - 1;
2337 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2338 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2339 I.eraseFromParent();
2340 return true;
2341 }
2342 case TargetOpcode::G_FENCE: {
2343 if (I.getOperand(1).getImm() == 0)
2344 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::CompilerBarrier))
2345 .addImm(I.getOperand(0).getImm());
2346 else
2347 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2348 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2349 I.eraseFromParent();
2350 return true;
2351 }
2352 default:
2353 return false;
2354 }
2355}
2356
2357bool AArch64InstructionSelector::select(MachineInstr &I) {
2358 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2358, __extension__ __PRETTY_FUNCTION__))
;
2359 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2359, __extension__ __PRETTY_FUNCTION__))
;
2360
2361 MachineBasicBlock &MBB = *I.getParent();
2362 MachineFunction &MF = *MBB.getParent();
2363 MachineRegisterInfo &MRI = MF.getRegInfo();
2364
2365 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2366 if (Subtarget->requiresStrictAlign()) {
2367 // We don't support this feature yet.
2368 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2369 return false;
2370 }
2371
2372 MIB.setInstrAndDebugLoc(I);
2373
2374 unsigned Opcode = I.getOpcode();
2375 // G_PHI requires same handling as PHI
2376 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2377 // Certain non-generic instructions also need some special handling.
2378
2379 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2380 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2381
2382 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2383 const Register DefReg = I.getOperand(0).getReg();
2384 const LLT DefTy = MRI.getType(DefReg);
2385
2386 const RegClassOrRegBank &RegClassOrBank =
2387 MRI.getRegClassOrRegBank(DefReg);
2388
2389 const TargetRegisterClass *DefRC
2390 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2391 if (!DefRC) {
2392 if (!DefTy.isValid()) {
2393 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2394 return false;
2395 }
2396 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2397 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2398 if (!DefRC) {
2399 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2400 return false;
2401 }
2402 }
2403
2404 I.setDesc(TII.get(TargetOpcode::PHI));
2405
2406 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2407 }
2408
2409 if (I.isCopy())
2410 return selectCopy(I, TII, MRI, TRI, RBI);
2411
2412 if (I.isDebugInstr())
2413 return selectDebugInstr(I, MRI, RBI);
2414
2415 return true;
2416 }
2417
2418
2419 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2420 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2421 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2422 return false;
2423 }
2424
2425 // Try to do some lowering before we start instruction selecting. These
2426 // lowerings are purely transformations on the input G_MIR and so selection
2427 // must continue after any modification of the instruction.
2428 if (preISelLower(I)) {
2429 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2430 }
2431
2432 // There may be patterns where the importer can't deal with them optimally,
2433 // but does select it to a suboptimal sequence so our custom C++ selection
2434 // code later never has a chance to work on it. Therefore, we have an early
2435 // selection attempt here to give priority to certain selection routines
2436 // over the imported ones.
2437 if (earlySelect(I))
2438 return true;
2439
2440 if (selectImpl(I, *CoverageInfo))
2441 return true;
2442
2443 LLT Ty =
2444 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2445
2446 switch (Opcode) {
2447 case TargetOpcode::G_SBFX:
2448 case TargetOpcode::G_UBFX: {
2449 static const unsigned OpcTable[2][2] = {
2450 {AArch64::UBFMWri, AArch64::UBFMXri},
2451 {AArch64::SBFMWri, AArch64::SBFMXri}};
2452 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2453 unsigned Size = Ty.getSizeInBits();
2454 unsigned Opc = OpcTable[IsSigned][Size == 64];
2455 auto Cst1 =
2456 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2457 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2457, __extension__ __PRETTY_FUNCTION__))
;
2458 auto Cst2 =
2459 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2460 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2460, __extension__ __PRETTY_FUNCTION__))
;
2461 auto LSB = Cst1->Value.getZExtValue();
2462 auto Width = Cst2->Value.getZExtValue();
2463 auto BitfieldInst =
2464 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2465 .addImm(LSB)
2466 .addImm(LSB + Width - 1);
2467 I.eraseFromParent();
2468 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2469 }
2470 case TargetOpcode::G_BRCOND:
2471 return selectCompareBranch(I, MF, MRI);
2472
2473 case TargetOpcode::G_BRINDIRECT: {
2474 I.setDesc(TII.get(AArch64::BR));
2475 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2476 }
2477
2478 case TargetOpcode::G_BRJT:
2479 return selectBrJT(I, MRI);
2480
2481 case AArch64::G_ADD_LOW: {
2482 // This op may have been separated from it's ADRP companion by the localizer
2483 // or some other code motion pass. Given that many CPUs will try to
2484 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2485 // which will later be expanded into an ADRP+ADD pair after scheduling.
2486 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2487 if (BaseMI->getOpcode() != AArch64::ADRP) {
2488 I.setDesc(TII.get(AArch64::ADDXri));
2489 I.addOperand(MachineOperand::CreateImm(0));
2490 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2491 }
2492 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
2493 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2493, __extension__ __PRETTY_FUNCTION__))
;
2494 auto Op1 = BaseMI->getOperand(1);
2495 auto Op2 = I.getOperand(2);
2496 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2497 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2498 Op1.getTargetFlags())
2499 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2500 Op2.getTargetFlags());
2501 I.eraseFromParent();
2502 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2503 }
2504
2505 case TargetOpcode::G_BSWAP: {
2506 // Handle vector types for G_BSWAP directly.
2507 Register DstReg = I.getOperand(0).getReg();
2508 LLT DstTy = MRI.getType(DstReg);
2509
2510 // We should only get vector types here; everything else is handled by the
2511 // importer right now.
2512 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2513 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2514 return false;
2515 }
2516
2517 // Only handle 4 and 2 element vectors for now.
2518 // TODO: 16-bit elements.
2519 unsigned NumElts = DstTy.getNumElements();
2520 if (NumElts != 4 && NumElts != 2) {
2521 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2522 return false;
2523 }
2524
2525 // Choose the correct opcode for the supported types. Right now, that's
2526 // v2s32, v4s32, and v2s64.
2527 unsigned Opc = 0;
2528 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2529 if (EltSize == 32)
2530 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2531 : AArch64::REV32v16i8;
2532 else if (EltSize == 64)
2533 Opc = AArch64::REV64v16i8;
2534
2535 // We should always get something by the time we get here...
2536 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2536, __extension__ __PRETTY_FUNCTION__))
;
2537
2538 I.setDesc(TII.get(Opc));
2539 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2540 }
2541
2542 case TargetOpcode::G_FCONSTANT:
2543 case TargetOpcode::G_CONSTANT: {
2544 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2545
2546 const LLT s8 = LLT::scalar(8);
2547 const LLT s16 = LLT::scalar(16);
2548 const LLT s32 = LLT::scalar(32);
2549 const LLT s64 = LLT::scalar(64);
2550 const LLT s128 = LLT::scalar(128);
2551 const LLT p0 = LLT::pointer(0, 64);
2552
2553 const Register DefReg = I.getOperand(0).getReg();
2554 const LLT DefTy = MRI.getType(DefReg);
2555 const unsigned DefSize = DefTy.getSizeInBits();
2556 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2557
2558 // FIXME: Redundant check, but even less readable when factored out.
2559 if (isFP) {
2560 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2561 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2562 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2563 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2564 return false;
2565 }
2566
2567 if (RB.getID() != AArch64::FPRRegBankID) {
2568 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2569 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2570 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2571 return false;
2572 }
2573
2574 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2575 // can be sure tablegen works correctly and isn't rescued by this code.
2576 // 0.0 is not covered by tablegen for FP128. So we will handle this
2577 // scenario in the code here.
2578 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2579 return false;
2580 } else {
2581 // s32 and s64 are covered by tablegen.
2582 if (Ty != p0 && Ty != s8 && Ty != s16) {
2583 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2584 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2585 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2586 return false;
2587 }
2588
2589 if (RB.getID() != AArch64::GPRRegBankID) {
2590 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2591 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2592 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2593 return false;
2594 }
2595 }
2596
2597 if (isFP) {
2598 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2599 // For 16, 64, and 128b values, emit a constant pool load.
2600 switch (DefSize) {
2601 default:
2602 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2602)
;
2603 case 32:
2604 // For s32, use a cp load if we have optsize/minsize.
2605 if (!shouldOptForSize(&MF))
2606 break;
2607 [[fallthrough]];
2608 case 16:
2609 case 64:
2610 case 128: {
2611 auto *FPImm = I.getOperand(1).getFPImm();
2612 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2613 if (!LoadMI) {
2614 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2615 return false;
2616 }
2617 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2618 I.eraseFromParent();
2619 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2620 }
2621 }
2622
2623 // Either emit a FMOV, or emit a copy to emit a normal mov.
2624 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
2625 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
;
2626 const Register DefGPRReg =
2627 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2628 MachineOperand &RegOp = I.getOperand(0);
2629 RegOp.setReg(DefGPRReg);
2630 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2631 MIB.buildCopy({DefReg}, {DefGPRReg});
2632
2633 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2634 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2635 return false;
2636 }
2637
2638 MachineOperand &ImmOp = I.getOperand(1);
2639 // FIXME: Is going through int64_t always correct?
2640 ImmOp.ChangeToImmediate(
2641 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2642 } else if (I.getOperand(1).isCImm()) {
2643 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2644 I.getOperand(1).ChangeToImmediate(Val);
2645 } else if (I.getOperand(1).isImm()) {
2646 uint64_t Val = I.getOperand(1).getImm();
2647 I.getOperand(1).ChangeToImmediate(Val);
2648 }
2649
2650 const unsigned MovOpc =
2651 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2652 I.setDesc(TII.get(MovOpc));
2653 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2654 return true;
2655 }
2656 case TargetOpcode::G_EXTRACT: {
2657 Register DstReg = I.getOperand(0).getReg();
2658 Register SrcReg = I.getOperand(1).getReg();
2659 LLT SrcTy = MRI.getType(SrcReg);
2660 LLT DstTy = MRI.getType(DstReg);
2661 (void)DstTy;
2662 unsigned SrcSize = SrcTy.getSizeInBits();
2663
2664 if (SrcTy.getSizeInBits() > 64) {
2665 // This should be an extract of an s128, which is like a vector extract.
2666 if (SrcTy.getSizeInBits() != 128)
2667 return false;
2668 // Only support extracting 64 bits from an s128 at the moment.
2669 if (DstTy.getSizeInBits() != 64)
2670 return false;
2671
2672 unsigned Offset = I.getOperand(2).getImm();
2673 if (Offset % 64 != 0)
2674 return false;
2675
2676 // Check we have the right regbank always.
2677 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2678 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2679 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2679, __extension__ __PRETTY_FUNCTION__))
;
2680
2681 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2682 auto NewI =
2683 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2684 .addUse(SrcReg, 0,
2685 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2686 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2687 AArch64::GPR64RegClass, NewI->getOperand(0));
2688 I.eraseFromParent();
2689 return true;
2690 }
2691
2692 // Emit the same code as a vector extract.
2693 // Offset must be a multiple of 64.
2694 unsigned LaneIdx = Offset / 64;
2695 MachineInstr *Extract = emitExtractVectorElt(
2696 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2697 if (!Extract)
2698 return false;
2699 I.eraseFromParent();
2700 return true;
2701 }
2702
2703 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2704 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2705 Ty.getSizeInBits() - 1);
2706
2707 if (SrcSize < 64) {
2708 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __extension__ __PRETTY_FUNCTION__))
2709 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2709, __extension__ __PRETTY_FUNCTION__))
;
2710 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2711 }
2712
2713 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2714 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2715 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2716 .addReg(DstReg, 0, AArch64::sub_32);
2717 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2718 AArch64::GPR32RegClass, MRI);
2719 I.getOperand(0).setReg(DstReg);
2720
2721 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2722 }
2723
2724 case TargetOpcode::G_INSERT: {
2725 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2726 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2727 unsigned DstSize = DstTy.getSizeInBits();
2728 // Larger inserts are vectors, same-size ones should be something else by
2729 // now (split up or turned into COPYs).
2730 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2731 return false;
2732
2733 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2734 unsigned LSB = I.getOperand(3).getImm();
2735 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2736 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2737 MachineInstrBuilder(MF, I).addImm(Width - 1);
2738
2739 if (DstSize < 64) {
2740 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2741, __extension__ __PRETTY_FUNCTION__))
2741 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2741, __extension__ __PRETTY_FUNCTION__))
;
2742 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2743 }
2744
2745 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2746 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2747 TII.get(AArch64::SUBREG_TO_REG))
2748 .addDef(SrcReg)
2749 .addImm(0)
2750 .addUse(I.getOperand(2).getReg())
2751 .addImm(AArch64::sub_32);
2752 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2753 AArch64::GPR32RegClass, MRI);
2754 I.getOperand(2).setReg(SrcReg);
2755
2756 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2757 }
2758 case TargetOpcode::G_FRAME_INDEX: {
2759 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2760 if (Ty != LLT::pointer(0, 64)) {
2761 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2762 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2763 return false;
2764 }
2765 I.setDesc(TII.get(AArch64::ADDXri));
2766
2767 // MOs for a #0 shifted immediate.
2768 I.addOperand(MachineOperand::CreateImm(0));
2769 I.addOperand(MachineOperand::CreateImm(0));
2770
2771 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2772 }
2773
2774 case TargetOpcode::G_GLOBAL_VALUE: {
2775 auto GV = I.getOperand(1).getGlobal();
2776 if (GV->isThreadLocal())
2777 return selectTLSGlobalValue(I, MRI);
2778
2779 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2780 if (OpFlags & AArch64II::MO_GOT) {
2781 I.setDesc(TII.get(AArch64::LOADgot));
2782 I.getOperand(1).setTargetFlags(OpFlags);
2783 } else if (TM.getCodeModel() == CodeModel::Large) {
2784 // Materialize the global using movz/movk instructions.
2785 materializeLargeCMVal(I, GV, OpFlags);
2786 I.eraseFromParent();
2787 return true;
2788 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2789 I.setDesc(TII.get(AArch64::ADR));
2790 I.getOperand(1).setTargetFlags(OpFlags);
2791 } else {
2792 I.setDesc(TII.get(AArch64::MOVaddr));
2793 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2794 MachineInstrBuilder MIB(MF, I);
2795 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2796 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2797 }
2798 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2799 }
2800
2801 case TargetOpcode::G_ZEXTLOAD:
2802 case TargetOpcode::G_LOAD:
2803 case TargetOpcode::G_STORE: {
2804 GLoadStore &LdSt = cast<GLoadStore>(I);
2805 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2806 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2807
2808 if (PtrTy != LLT::pointer(0, 64)) {
2809 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2810 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2811 return false;
2812 }
2813
2814 uint64_t MemSizeInBytes = LdSt.getMemSize();
2815 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2816 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2817
2818 // Need special instructions for atomics that affect ordering.
2819 if (Order != AtomicOrdering::NotAtomic &&
2820 Order != AtomicOrdering::Unordered &&
2821 Order != AtomicOrdering::Monotonic) {
2822 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2822, __extension__ __PRETTY_FUNCTION__))
;
2823 if (MemSizeInBytes > 64)
2824 return false;
2825
2826 if (isa<GLoad>(LdSt)) {
2827 static constexpr unsigned LDAPROpcodes[] = {
2828 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2829 static constexpr unsigned LDAROpcodes[] = {
2830 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2831 ArrayRef<unsigned> Opcodes =
2832 STI.hasLDAPR() && Order != AtomicOrdering::SequentiallyConsistent
2833 ? LDAPROpcodes
2834 : LDAROpcodes;
2835 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2836 } else {
2837 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2838 AArch64::STLRW, AArch64::STLRX};
2839 Register ValReg = LdSt.getReg(0);
2840 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2841 // Emit a subreg copy of 32 bits.
2842 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2843 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2844 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2845 I.getOperand(0).setReg(NewVal);
2846 }
2847 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2848 }
2849 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2850 return true;
2851 }
2852
2853#ifndef NDEBUG
2854 const Register PtrReg = LdSt.getPointerReg();
2855 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2856 // Check that the pointer register is valid.
2857 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2858, __extension__ __PRETTY_FUNCTION__))
2858 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2858, __extension__ __PRETTY_FUNCTION__))
;
2859 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2860, __extension__ __PRETTY_FUNCTION__))
2860 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2860, __extension__ __PRETTY_FUNCTION__))
;
2861#endif
2862
2863 const Register ValReg = LdSt.getReg(0);
2864 const LLT ValTy = MRI.getType(ValReg);
2865 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2866
2867 // The code below doesn't support truncating stores, so we need to split it
2868 // again.
2869 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2870 unsigned SubReg;
2871 LLT MemTy = LdSt.getMMO().getMemoryType();
2872 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2873 if (!getSubRegForClass(RC, TRI, SubReg))
2874 return false;
2875
2876 // Generate a subreg copy.
2877 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2878 .addReg(ValReg, 0, SubReg)
2879 .getReg(0);
2880 RBI.constrainGenericRegister(Copy, *RC, MRI);
2881 LdSt.getOperand(0).setReg(Copy);
2882 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2883 // If this is an any-extending load from the FPR bank, split it into a regular
2884 // load + extend.
2885 if (RB.getID() == AArch64::FPRRegBankID) {
2886 unsigned SubReg;
2887 LLT MemTy = LdSt.getMMO().getMemoryType();
2888 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2889 if (!getSubRegForClass(RC, TRI, SubReg))
2890 return false;
2891 Register OldDst = LdSt.getReg(0);
2892 Register NewDst =
2893 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2894 LdSt.getOperand(0).setReg(NewDst);
2895 MRI.setRegBank(NewDst, RB);
2896 // Generate a SUBREG_TO_REG to extend it.
2897 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2898 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2899 .addImm(0)
2900 .addUse(NewDst)
2901 .addImm(SubReg);
2902 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2903 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2904 MIB.setInstr(LdSt);
2905 }
2906 }
2907
2908 // Helper lambda for partially selecting I. Either returns the original
2909 // instruction with an updated opcode, or a new instruction.
2910 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2911 bool IsStore = isa<GStore>(I);
2912 const unsigned NewOpc =
2913 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2914 if (NewOpc == I.getOpcode())
2915 return nullptr;
2916 // Check if we can fold anything into the addressing mode.
2917 auto AddrModeFns =
2918 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2919 if (!AddrModeFns) {
2920 // Can't fold anything. Use the original instruction.
2921 I.setDesc(TII.get(NewOpc));
2922 I.addOperand(MachineOperand::CreateImm(0));
2923 return &I;
2924 }
2925
2926 // Folded something. Create a new instruction and return it.
2927 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2928 Register CurValReg = I.getOperand(0).getReg();
2929 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2930 NewInst.cloneMemRefs(I);
2931 for (auto &Fn : *AddrModeFns)
2932 Fn(NewInst);
2933 I.eraseFromParent();
2934 return &*NewInst;
2935 };
2936
2937 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2938 if (!LoadStore)
2939 return false;
2940
2941 // If we're storing a 0, use WZR/XZR.
2942 if (Opcode == TargetOpcode::G_STORE) {
2943 auto CVal = getIConstantVRegValWithLookThrough(
2944 LoadStore->getOperand(0).getReg(), MRI);
2945 if (CVal && CVal->Value == 0) {
2946 switch (LoadStore->getOpcode()) {
2947 case AArch64::STRWui:
2948 case AArch64::STRHHui:
2949 case AArch64::STRBBui:
2950 LoadStore->getOperand(0).setReg(AArch64::WZR);
2951 break;
2952 case AArch64::STRXui:
2953 LoadStore->getOperand(0).setReg(AArch64::XZR);
2954 break;
2955 }
2956 }
2957 }
2958
2959 if (IsZExtLoad) {
2960 // The zextload from a smaller type to i32 should be handled by the
2961 // importer.
2962 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2963 return false;
2964 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2965 // and zero_extend with SUBREG_TO_REG.
2966 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2967 Register DstReg = LoadStore->getOperand(0).getReg();
2968 LoadStore->getOperand(0).setReg(LdReg);
2969
2970 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2971 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2972 .addImm(0)
2973 .addUse(LdReg)
2974 .addImm(AArch64::sub_32);
2975 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2976 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2977 MRI);
2978 }
2979 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2980 }
2981
2982 case TargetOpcode::G_SMULH:
2983 case TargetOpcode::G_UMULH: {
2984 // Reject the various things we don't support yet.
2985 if (unsupportedBinOp(I, RBI, MRI, TRI))
2986 return false;
2987
2988 const Register DefReg = I.getOperand(0).getReg();
2989 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2990
2991 if (RB.getID() != AArch64::GPRRegBankID) {
2992 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2993 return false;
2994 }
2995
2996 if (Ty != LLT::scalar(64)) {
2997 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2998 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2999 return false;
3000 }
3001
3002 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3003 : AArch64::UMULHrr;
3004 I.setDesc(TII.get(NewOpc));
3005
3006 // Now that we selected an opcode, we need to constrain the register
3007 // operands to use appropriate classes.
3008 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3009 }
3010 case TargetOpcode::G_LSHR:
3011 case TargetOpcode::G_ASHR:
3012 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3013 return selectVectorAshrLshr(I, MRI);
3014 [[fallthrough]];
3015 case TargetOpcode::G_SHL:
3016 if (Opcode == TargetOpcode::G_SHL &&
3017 MRI.getType(I.getOperand(0).getReg()).isVector())
3018 return selectVectorSHL(I, MRI);
3019
3020 // These shifts were legalized to have 64 bit shift amounts because we
3021 // want to take advantage of the selection patterns that assume the
3022 // immediates are s64s, however, selectBinaryOp will assume both operands
3023 // will have the same bit size.
3024 {
3025 Register SrcReg = I.getOperand(1).getReg();
3026 Register ShiftReg = I.getOperand(2).getReg();
3027 const LLT ShiftTy = MRI.getType(ShiftReg);
3028 const LLT SrcTy = MRI.getType(SrcReg);
3029 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3030 ShiftTy.getSizeInBits() == 64) {
3031 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3031, __extension__ __PRETTY_FUNCTION__))
;
3032 // Insert a subregister copy to implement a 64->32 trunc
3033 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3034 .addReg(ShiftReg, 0, AArch64::sub_32);
3035 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3036 I.getOperand(2).setReg(Trunc.getReg(0));
3037 }
3038 }
3039 [[fallthrough]];
3040 case TargetOpcode::G_OR: {
3041 // Reject the various things we don't support yet.
3042 if (unsupportedBinOp(I, RBI, MRI, TRI))
3043 return false;
3044
3045 const unsigned OpSize = Ty.getSizeInBits();
3046
3047 const Register DefReg = I.getOperand(0).getReg();
3048 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3049
3050 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3051 if (NewOpc == I.getOpcode())
3052 return false;
3053
3054 I.setDesc(TII.get(NewOpc));
3055 // FIXME: Should the type be always reset in setDesc?
3056
3057 // Now that we selected an opcode, we need to constrain the register
3058 // operands to use appropriate classes.
3059 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3060 }
3061
3062 case TargetOpcode::G_PTR_ADD: {
3063 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3064 I.eraseFromParent();
3065 return true;
3066 }
3067 case TargetOpcode::G_SADDO:
3068 case TargetOpcode::G_UADDO:
3069 case TargetOpcode::G_SSUBO:
3070 case TargetOpcode::G_USUBO: {
3071 // Emit the operation and get the correct condition code.
3072 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3073 I.getOperand(2), I.getOperand(3), MIB);
3074
3075 // Now, put the overflow result in the register given by the first operand
3076 // to the overflow op. CSINC increments the result when the predicate is
3077 // false, so to get the increment when it's true, we need to use the
3078 // inverse. In this case, we want to increment when carry is set.
3079 Register ZReg = AArch64::WZR;
3080 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3081 getInvertedCondCode(OpAndCC.second), MIB);
3082 I.eraseFromParent();
3083 return true;
3084 }
3085
3086 case TargetOpcode::G_PTRMASK: {
3087 Register MaskReg = I.getOperand(2).getReg();
3088 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3089 // TODO: Implement arbitrary cases
3090 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3091 return false;
3092
3093 uint64_t Mask = *MaskVal;
3094 I.setDesc(TII.get(AArch64::ANDXri));
3095 I.getOperand(2).ChangeToImmediate(
3096 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3097
3098 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3099 }
3100 case TargetOpcode::G_PTRTOINT:
3101 case TargetOpcode::G_TRUNC: {
3102 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3103 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3104
3105 const Register DstReg = I.getOperand(0).getReg();
3106 const Register SrcReg = I.getOperand(1).getReg();
3107
3108 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3109 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3110
3111 if (DstRB.getID() != SrcRB.getID()) {
3112 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3113 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3114 return false;
3115 }
3116
3117 if (DstRB.getID() == AArch64::GPRRegBankID) {
3118 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3119 if (!DstRC)
3120 return false;
3121
3122 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3123 if (!SrcRC)
3124 return false;
3125
3126 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3127 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3128 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3129 return false;
3130 }
3131
3132 if (DstRC == SrcRC) {
3133 // Nothing to be done
3134 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3135 SrcTy == LLT::scalar(64)) {
3136 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3136)
;
3137 return false;
3138 } else if (DstRC == &AArch64::GPR32RegClass &&
3139 SrcRC == &AArch64::GPR64RegClass) {
3140 I.getOperand(1).setSubReg(AArch64::sub_32);
3141 } else {
3142 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3143 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3144 return false;
3145 }
3146
3147 I.setDesc(TII.get(TargetOpcode::COPY));
3148 return true;
3149 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3150 if (DstTy == LLT::fixed_vector(4, 16) &&
3151 SrcTy == LLT::fixed_vector(4, 32)) {
3152 I.setDesc(TII.get(AArch64::XTNv4i16));
3153 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3154 return true;
3155 }
3156
3157 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3158 MachineInstr *Extract = emitExtractVectorElt(
3159 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3160 if (!Extract)
3161 return false;
3162 I.eraseFromParent();
3163 return true;
3164 }
3165
3166 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3167 if (Opcode == TargetOpcode::G_PTRTOINT) {
3168 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3168, __extension__ __PRETTY_FUNCTION__))
;
3169 I.setDesc(TII.get(TargetOpcode::COPY));
3170 return selectCopy(I, TII, MRI, TRI, RBI);
3171 }
3172 }
3173
3174 return false;
3175 }
3176
3177 case TargetOpcode::G_ANYEXT: {
3178 if (selectUSMovFromExtend(I, MRI))
3179 return true;
3180
3181 const Register DstReg = I.getOperand(0).getReg();
3182 const Register SrcReg = I.getOperand(1).getReg();
3183
3184 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3185 if (RBDst.getID() != AArch64::GPRRegBankID) {
3186 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3187 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3188 return false;
3189 }
3190
3191 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3192 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3193 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3194 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3195 return false;
3196 }
3197
3198 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3199
3200 if (DstSize == 0) {
3201 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3202 return false;
3203 }
3204
3205 if (DstSize != 64 && DstSize > 32) {
3206 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3207 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3208 return false;
3209 }
3210 // At this point G_ANYEXT is just like a plain COPY, but we need
3211 // to explicitly form the 64-bit value if any.
3212 if (DstSize > 32) {
3213 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3214 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3215 .addDef(ExtSrc)
3216 .addImm(0)
3217 .addUse(SrcReg)
3218 .addImm(AArch64::sub_32);
3219 I.getOperand(1).setReg(ExtSrc);
3220 }
3221 return selectCopy(I, TII, MRI, TRI, RBI);
3222 }
3223
3224 case TargetOpcode::G_ZEXT:
3225 case TargetOpcode::G_SEXT_INREG:
3226 case TargetOpcode::G_SEXT: {
3227 if (selectUSMovFromExtend(I, MRI))
3228 return true;
3229
3230 unsigned Opcode = I.getOpcode();
3231 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3232 const Register DefReg = I.getOperand(0).getReg();
3233 Register SrcReg = I.getOperand(1).getReg();
3234 const LLT DstTy = MRI.getType(DefReg);
3235 const LLT SrcTy = MRI.getType(SrcReg);
3236 unsigned DstSize = DstTy.getSizeInBits();
3237 unsigned SrcSize = SrcTy.getSizeInBits();
3238
3239 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3240 // extended is encoded in the imm.
3241 if (Opcode == TargetOpcode::G_SEXT_INREG)
3242 SrcSize = I.getOperand(2).getImm();
3243
3244 if (DstTy.isVector())
3245 return false; // Should be handled by imported patterns.
3246
3247 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
3248 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
3249 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3249, __extension__ __PRETTY_FUNCTION__))
;
3250
3251 MachineInstr *ExtI;
3252
3253 // First check if we're extending the result of a load which has a dest type
3254 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3255 // GPR register on AArch64 and all loads which are smaller automatically
3256 // zero-extend the upper bits. E.g.
3257 // %v(s8) = G_LOAD %p, :: (load 1)
3258 // %v2(s32) = G_ZEXT %v(s8)
3259 if (!IsSigned) {
3260 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3261 bool IsGPR =
3262 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3263 if (LoadMI && IsGPR) {
3264 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3265 unsigned BytesLoaded = MemOp->getSize();
3266 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3267 return selectCopy(I, TII, MRI, TRI, RBI);
3268 }
3269
3270 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3271 // + SUBREG_TO_REG.
3272 //
3273 // If we are zero extending from 32 bits to 64 bits, it's possible that
3274 // the instruction implicitly does the zero extend for us. In that case,
3275 // we only need the SUBREG_TO_REG.
3276 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3277 // Unlike with the G_LOAD case, we don't want to look through copies
3278 // here. (See isDef32.)
3279 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3280 Register SubregToRegSrc = SrcReg;
3281
3282 // Does the instruction implicitly zero extend?
3283 if (!Def || !isDef32(*Def)) {
3284 // No. Zero out using an OR.
3285 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3286 const Register ZReg = AArch64::WZR;
3287 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3288 SubregToRegSrc = OrDst;
3289 }
3290
3291 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3292 .addImm(0)
3293 .addUse(SubregToRegSrc)
3294 .addImm(AArch64::sub_32);
3295
3296 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3297 MRI)) {
3298 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3299 return false;
3300 }
3301
3302 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3303 MRI)) {
3304 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3305 return false;
3306 }
3307
3308 I.eraseFromParent();
3309 return true;
3310 }
3311 }
3312
3313 if (DstSize == 64) {
3314 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3315 // FIXME: Can we avoid manually doing this?
3316 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3317 MRI)) {
3318 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3319 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3320 return false;
3321 }
3322 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3323 {&AArch64::GPR64RegClass}, {})
3324 .addImm(0)
3325 .addUse(SrcReg)
3326 .addImm(AArch64::sub_32)
3327 .getReg(0);
3328 }
3329
3330 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3331 {DefReg}, {SrcReg})
3332 .addImm(0)
3333 .addImm(SrcSize - 1);
3334 } else if (DstSize <= 32) {
3335 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3336 {DefReg}, {SrcReg})
3337 .addImm(0)
3338 .addImm(SrcSize - 1);
3339 } else {
3340 return false;
3341 }
3342
3343 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3344 I.eraseFromParent();
3345 return true;
3346 }
3347
3348 case TargetOpcode::G_SITOFP:
3349 case TargetOpcode::G_UITOFP:
3350 case TargetOpcode::G_FPTOSI:
3351 case TargetOpcode::G_FPTOUI: {
3352 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3353 SrcTy = MRI.getType(I.getOperand(1).getReg());
3354 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3355 if (NewOpc == Opcode)
3356 return false;
3357
3358 I.setDesc(TII.get(NewOpc));
3359 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3360 I.setFlags(MachineInstr::NoFPExcept);
3361
3362 return true;
3363 }
3364
3365 case TargetOpcode::G_FREEZE:
3366 return selectCopy(I, TII, MRI, TRI, RBI);
3367
3368 case TargetOpcode::G_INTTOPTR:
3369 // The importer is currently unable to import pointer types since they
3370 // didn't exist in SelectionDAG.
3371 return selectCopy(I, TII, MRI, TRI, RBI);
3372
3373 case TargetOpcode::G_BITCAST:
3374 // Imported SelectionDAG rules can handle every bitcast except those that
3375 // bitcast from a type to the same type. Ideally, these shouldn't occur
3376 // but we might not run an optimizer that deletes them. The other exception
3377 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3378 // of them.
3379 return selectCopy(I, TII, MRI, TRI, RBI);
3380
3381 case TargetOpcode::G_SELECT: {
3382 auto &Sel = cast<GSelect>(I);
3383 const Register CondReg = Sel.getCondReg();
3384 const Register TReg = Sel.getTrueReg();
3385 const Register FReg = Sel.getFalseReg();
3386
3387 if (tryOptSelect(Sel))
3388 return true;
3389
3390 // Make sure to use an unused vreg instead of wzr, so that the peephole
3391 // optimizations will be able to optimize these.
3392 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3393 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3394 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3395 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3396 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3397 return false;
3398 Sel.eraseFromParent();
3399 return true;
3400 }
3401 case TargetOpcode::G_ICMP: {
3402 if (Ty.isVector())
3403 return selectVectorICmp(I, MRI);
3404
3405 if (Ty != LLT::scalar(32)) {
3406 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3407 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3408 return false;
3409 }
3410
3411 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3412 const AArch64CC::CondCode InvCC =
3413 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3414 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3415 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3416 /*Src2=*/AArch64::WZR, InvCC, MIB);
3417 I.eraseFromParent();
3418 return true;
3419 }
3420
3421 case TargetOpcode::G_FCMP: {
3422 CmpInst::Predicate Pred =
3423 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3424 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3425 Pred) ||
3426 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3427 return false;
3428 I.eraseFromParent();
3429 return true;
3430 }
3431 case TargetOpcode::G_VASTART:
3432 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3433 : selectVaStartAAPCS(I, MF, MRI);
3434 case TargetOpcode::G_INTRINSIC:
3435 return selectIntrinsic(I, MRI);
3436 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3437 return selectIntrinsicWithSideEffects(I, MRI);
3438 case TargetOpcode::G_IMPLICIT_DEF: {
3439 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3440 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3441 const Register DstReg = I.getOperand(0).getReg();
3442 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3443 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3444 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3445 return true;
3446 }
3447 case TargetOpcode::G_BLOCK_ADDR: {
3448 if (TM.getCodeModel() == CodeModel::Large) {
3449 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3450 I.eraseFromParent();
3451 return true;
3452 } else {
3453 I.setDesc(TII.get(AArch64::MOVaddrBA));
3454 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3455 I.getOperand(0).getReg())
3456 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3457 /* Offset */ 0, AArch64II::MO_PAGE)
3458 .addBlockAddress(
3459 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3460 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3461 I.eraseFromParent();
3462 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3463 }
3464 }
3465 case AArch64::G_DUP: {
3466 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3467 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3468 // difficult because at RBS we may end up pessimizing the fpr case if we
3469 // decided to add an anyextend to fix this. Manual selection is the most
3470 // robust solution for now.
3471 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3472 AArch64::GPRRegBankID)
3473 return false; // We expect the fpr regbank case to be imported.
3474 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3475 if (VecTy == LLT::fixed_vector(8, 8))
3476 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3477 else if (VecTy == LLT::fixed_vector(16, 8))
3478 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3479 else if (VecTy == LLT::fixed_vector(4, 16))
3480 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3481 else if (VecTy == LLT::fixed_vector(8, 16))
3482 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3483 else
3484 return false;
3485 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3486 }
3487 case TargetOpcode::G_INTRINSIC_TRUNC:
3488 return selectIntrinsicTrunc(I, MRI);
3489 case TargetOpcode::G_INTRINSIC_ROUND:
3490 return selectIntrinsicRound(I, MRI);
3491 case TargetOpcode::G_BUILD_VECTOR:
3492 return selectBuildVector(I, MRI);
3493 case TargetOpcode::G_MERGE_VALUES:
3494 return selectMergeValues(I, MRI);
3495 case TargetOpcode::G_UNMERGE_VALUES:
3496 return selectUnmergeValues(I, MRI);
3497 case TargetOpcode::G_SHUFFLE_VECTOR:
3498 return selectShuffleVector(I, MRI);
3499 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3500 return selectExtractElt(I, MRI);
3501 case TargetOpcode::G_INSERT_VECTOR_ELT:
3502 return selectInsertElt(I, MRI);
3503 case TargetOpcode::G_CONCAT_VECTORS:
3504 return selectConcatVectors(I, MRI);
3505 case TargetOpcode::G_JUMP_TABLE:
3506 return selectJumpTable(I, MRI);
3507 case TargetOpcode::G_VECREDUCE_FADD:
3508 case TargetOpcode::G_VECREDUCE_ADD:
3509 return selectReduction(I, MRI);
3510 case TargetOpcode::G_MEMCPY:
3511 case TargetOpcode::G_MEMCPY_INLINE:
3512 case TargetOpcode::G_MEMMOVE:
3513 case TargetOpcode::G_MEMSET:
3514 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3514, __extension__ __PRETTY_FUNCTION__))
;
3515 return selectMOPS(I, MRI);
3516 }
3517
3518 return false;
3519}
3520
3521bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3522 MachineRegisterInfo &MRI) {
3523 Register VecReg = I.getOperand(1).getReg();
3524 LLT VecTy = MRI.getType(VecReg);
3525 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3526 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3527 // a subregister copy afterwards.
3528 if (VecTy == LLT::fixed_vector(2, 32)) {
3529 Register DstReg = I.getOperand(0).getReg();
3530 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3531 {VecReg, VecReg});
3532 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3533 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3534 .getReg(0);
3535 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3536 I.eraseFromParent();
3537 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3538 }
3539
3540 unsigned Opc = 0;
3541 if (VecTy == LLT::fixed_vector(16, 8))
3542 Opc = AArch64::ADDVv16i8v;
3543 else if (VecTy == LLT::fixed_vector(8, 16))
3544 Opc = AArch64::ADDVv8i16v;
3545 else if (VecTy == LLT::fixed_vector(4, 32))
3546 Opc = AArch64::ADDVv4i32v;
3547 else if (VecTy == LLT::fixed_vector(2, 64))
3548 Opc = AArch64::ADDPv2i64p;
3549 else {
3550 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3551 return false;
3552 }
3553 I.setDesc(TII.get(Opc));
3554 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3555 }
3556
3557 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3558 unsigned Opc = 0;
3559 if (VecTy == LLT::fixed_vector(2, 32))
3560 Opc = AArch64::FADDPv2i32p;
3561 else if (VecTy == LLT::fixed_vector(2, 64))
3562 Opc = AArch64::FADDPv2i64p;
3563 else {
3564 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3565 return false;
3566 }
3567 I.setDesc(TII.get(Opc));
3568 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3569 }
3570 return false;
3571}
3572
3573bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3574 MachineRegisterInfo &MRI) {
3575 unsigned Mopcode;
1
'Mopcode' declared without an initial value
3576 switch (GI.getOpcode()) {
2
'Default' branch taken. Execution continues on line 3590
3577 case TargetOpcode::G_MEMCPY:
3578 case TargetOpcode::G_MEMCPY_INLINE:
3579 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3580 break;
3581 case TargetOpcode::G_MEMMOVE:
3582 Mopcode = AArch64::MOPSMemoryMovePseudo;
3583 break;
3584 case TargetOpcode::G_MEMSET:
3585 // For tagged memset see llvm.aarch64.mops.memset.tag
3586 Mopcode = AArch64::MOPSMemorySetPseudo;
3587 break;
3588 }
3589
3590 auto &DstPtr = GI.getOperand(0);
3591 auto &SrcOrVal = GI.getOperand(1);
3592 auto &Size = GI.getOperand(2);
3593
3594 // Create copies of the registers that can be clobbered.
3595 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3596 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3597 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3598
3599 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3
The left operand of '==' is a garbage value
3600 const auto &SrcValRegClass =
3601 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3602
3603 // Constrain to specific registers
3604 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3605 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3606 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3607
3608 MIB.buildCopy(DstPtrCopy, DstPtr);
3609 MIB.buildCopy(SrcValCopy, SrcOrVal);
3610 MIB.buildCopy(SizeCopy, Size);
3611
3612 // New instruction uses the copied registers because it must update them.
3613 // The defs are not used since they don't exist in G_MEM*. They are still
3614 // tied.
3615 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3616 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3617 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3618 if (IsSet) {
3619 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3620 {DstPtrCopy, SizeCopy, SrcValCopy});
3621 } else {
3622 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3623 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3624 {DstPtrCopy, SrcValCopy, SizeCopy});
3625 }
3626
3627 GI.eraseFromParent();
3628 return true;
3629}
3630
3631bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3632 MachineRegisterInfo &MRI) {
3633 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3633, __extension__ __PRETTY_FUNCTION__))
;
3634 Register JTAddr = I.getOperand(0).getReg();
3635 unsigned JTI = I.getOperand(1).getIndex();
3636 Register Index = I.getOperand(2).getReg();
3637
3638 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3639 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3640
3641 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3642 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3643 {TargetReg, ScratchReg}, {JTAddr, Index})
3644 .addJumpTableIndex(JTI);
3645 // Build the indirect branch.
3646 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3647 I.eraseFromParent();
3648 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3649}
3650
3651bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3652 MachineRegisterInfo &MRI) {
3653 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3653, __extension__ __PRETTY_FUNCTION__))
;
3654 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3654, __extension__ __PRETTY_FUNCTION__))
;
3655
3656 Register DstReg = I.getOperand(0).getReg();
3657 unsigned JTI = I.getOperand(1).getIndex();
3658 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3659 auto MovMI =
3660 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3661 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3662 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3663 I.eraseFromParent();
3664 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3665}
3666
3667bool AArch64InstructionSelector::selectTLSGlobalValue(
3668 MachineInstr &I, MachineRegisterInfo &MRI) {
3669 if (!STI.isTargetMachO())
3670 return false;
3671 MachineFunction &MF = *I.getParent()->getParent();
3672 MF.getFrameInfo().setAdjustsStack(true);
3673
3674 const auto &GlobalOp = I.getOperand(1);
3675 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
3676 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
;
3677 const GlobalValue &GV = *GlobalOp.getGlobal();
3678
3679 auto LoadGOT =
3680 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3681 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3682
3683 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3684 {LoadGOT.getReg(0)})
3685 .addImm(0);
3686
3687 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3688 // TLS calls preserve all registers except those that absolutely must be
3689 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3690 // silly).
3691 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3692 .addUse(AArch64::X0, RegState::Implicit)
3693 .addDef(AArch64::X0, RegState::Implicit)
3694 .addRegMask(TRI.getTLSCallPreservedMask());
3695
3696 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3697 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3698 MRI);
3699 I.eraseFromParent();
3700 return true;
3701}
3702
3703bool AArch64InstructionSelector::selectIntrinsicTrunc(
3704 MachineInstr &I, MachineRegisterInfo &MRI) const {
3705 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3706
3707 // Select the correct opcode.
3708 unsigned Opc = 0;
3709 if (!SrcTy.isVector()) {
3710 switch (SrcTy.getSizeInBits()) {
3711 default:
3712 case 16:
3713 Opc = AArch64::FRINTZHr;
3714 break;
3715 case 32:
3716 Opc = AArch64::FRINTZSr;
3717 break;
3718 case 64:
3719 Opc = AArch64::FRINTZDr;
3720 break;
3721 }
3722 } else {
3723 unsigned NumElts = SrcTy.getNumElements();
3724 switch (SrcTy.getElementType().getSizeInBits()) {
3725 default:
3726 break;
3727 case 16:
3728 if (NumElts == 4)
3729 Opc = AArch64::FRINTZv4f16;
3730 else if (NumElts == 8)
3731 Opc = AArch64::FRINTZv8f16;
3732 break;
3733 case 32:
3734 if (NumElts == 2)
3735 Opc = AArch64::FRINTZv2f32;
3736 else if (NumElts == 4)
3737 Opc = AArch64::FRINTZv4f32;
3738 break;
3739 case 64:
3740 if (NumElts == 2)
3741 Opc = AArch64::FRINTZv2f64;
3742 break;
3743 }
3744 }
3745
3746 if (!Opc) {
3747 // Didn't get an opcode above, bail.
3748 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3749 return false;
3750 }
3751
3752 // Legalization would have set us up perfectly for this; we just need to
3753 // set the opcode and move on.
3754 I.setDesc(TII.get(Opc));
3755 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3756}
3757
3758bool AArch64InstructionSelector::selectIntrinsicRound(
3759 MachineInstr &I, MachineRegisterInfo &MRI) const {
3760 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3761
3762 // Select the correct opcode.
3763 unsigned Opc = 0;
3764 if (!SrcTy.isVector()) {
3765 switch (SrcTy.getSizeInBits()) {
3766 default:
3767 case 16:
3768 Opc = AArch64::FRINTAHr;
3769 break;
3770 case 32:
3771 Opc = AArch64::FRINTASr;
3772 break;
3773 case 64:
3774 Opc = AArch64::FRINTADr;
3775 break;
3776 }
3777 } else {
3778 unsigned NumElts = SrcTy.getNumElements();
3779 switch (SrcTy.getElementType().getSizeInBits()) {
3780 default:
3781 break;
3782 case 16:
3783 if (NumElts == 4)
3784 Opc = AArch64::FRINTAv4f16;
3785 else if (NumElts == 8)
3786 Opc = AArch64::FRINTAv8f16;
3787 break;
3788 case 32:
3789 if (NumElts == 2)
3790 Opc = AArch64::FRINTAv2f32;
3791 else if (NumElts == 4)
3792 Opc = AArch64::FRINTAv4f32;
3793 break;
3794 case 64:
3795 if (NumElts == 2)
3796 Opc = AArch64::FRINTAv2f64;
3797 break;
3798 }
3799 }
3800
3801 if (!Opc) {
3802 // Didn't get an opcode above, bail.
3803 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3804 return false;
3805 }
3806
3807 // Legalization would have set us up perfectly for this; we just need to
3808 // set the opcode and move on.
3809 I.setDesc(TII.get(Opc));
3810 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3811}
3812
3813bool AArch64InstructionSelector::selectVectorICmp(
3814 MachineInstr &I, MachineRegisterInfo &MRI) {
3815 Register DstReg = I.getOperand(0).getReg();
3816 LLT DstTy = MRI.getType(DstReg);
3817 Register SrcReg = I.getOperand(2).getReg();
3818 Register Src2Reg = I.getOperand(3).getReg();
3819 LLT SrcTy = MRI.getType(SrcReg);
3820
3821 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3822 unsigned NumElts = DstTy.getNumElements();
3823
3824 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3825 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3826 // Third index is cc opcode:
3827 // 0 == eq
3828 // 1 == ugt
3829 // 2 == uge
3830 // 3 == ult
3831 // 4 == ule
3832 // 5 == sgt
3833 // 6 == sge
3834 // 7 == slt
3835 // 8 == sle
3836 // ne is done by negating 'eq' result.
3837
3838 // This table below assumes that for some comparisons the operands will be
3839 // commuted.
3840 // ult op == commute + ugt op
3841 // ule op == commute + uge op
3842 // slt op == commute + sgt op
3843 // sle op == commute + sge op
3844 unsigned PredIdx = 0;
3845 bool SwapOperands = false;
3846 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3847 switch (Pred) {
3848 case CmpInst::ICMP_NE:
3849 case CmpInst::ICMP_EQ:
3850 PredIdx = 0;
3851 break;
3852 case CmpInst::ICMP_UGT:
3853 PredIdx = 1;
3854 break;
3855 case CmpInst::ICMP_UGE:
3856 PredIdx = 2;
3857 break;
3858 case CmpInst::ICMP_ULT:
3859 PredIdx = 3;
3860 SwapOperands = true;
3861 break;
3862 case CmpInst::ICMP_ULE:
3863 PredIdx = 4;
3864 SwapOperands = true;
3865 break;
3866 case CmpInst::ICMP_SGT:
3867 PredIdx = 5;
3868 break;
3869 case CmpInst::ICMP_SGE:
3870 PredIdx = 6;
3871 break;
3872 case CmpInst::ICMP_SLT:
3873 PredIdx = 7;
3874 SwapOperands = true;
3875 break;
3876 case CmpInst::ICMP_SLE:
3877 PredIdx = 8;
3878 SwapOperands = true;
3879 break;
3880 default:
3881 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3881)
;
3882 return false;
3883 }
3884
3885 // This table obviously should be tablegen'd when we have our GISel native
3886 // tablegen selector.
3887
3888 static const unsigned OpcTable[4][4][9] = {
3889 {
3890 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3891 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3892 0 /* invalid */},
3893 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3894 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3895 0 /* invalid */},
3896 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3897 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3898 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3899 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3900 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3901 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3902 },
3903 {
3904 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3905 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3906 0 /* invalid */},
3907 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3908 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3909 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3910 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3911 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3912 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3913 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3914 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3915 0 /* invalid */}
3916 },
3917 {
3918 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3919 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3920 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3921 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3922 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3923 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3924 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3925 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3926 0 /* invalid */},
3927 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3928 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3929 0 /* invalid */}
3930 },
3931 {
3932 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3933 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3934 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3935 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3936 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3937 0 /* invalid */},
3938 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3939 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3940 0 /* invalid */},
3941 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3942 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3943 0 /* invalid */}
3944 },
3945 };
3946 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3947 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3948 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3949 if (!Opc) {
3950 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3951 return false;
3952 }
3953
3954 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3955 const TargetRegisterClass *SrcRC =
3956 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3957 if (!SrcRC) {
3958 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3959 return false;
3960 }
3961
3962 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3963 if (SrcTy.getSizeInBits() == 128)
3964 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3965
3966 if (SwapOperands)
3967 std::swap(SrcReg, Src2Reg);
3968
3969 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3970 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3971
3972 // Invert if we had a 'ne' cc.
3973 if (NotOpc) {
3974 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3975 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3976 } else {
3977 MIB.buildCopy(DstReg, Cmp.getReg(0));
3978 }
3979 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3980 I.eraseFromParent();
3981 return true;
3982}
3983
3984MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3985 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3986 MachineIRBuilder &MIRBuilder) const {
3987 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3988
3989 auto BuildFn = [&](unsigned SubregIndex) {
3990 auto Ins =
3991 MIRBuilder
3992 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3993 .addImm(SubregIndex);
3994 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3995 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3996 return &*Ins;
3997 };
3998
3999 switch (EltSize) {
4000 case 16:
4001 return BuildFn(AArch64::hsub);
4002 case 32:
4003 return BuildFn(AArch64::ssub);
4004 case 64:
4005 return BuildFn(AArch64::dsub);
4006 default:
4007 return nullptr;
4008 }
4009}
4010
4011bool AArch64InstructionSelector::selectMergeValues(
4012 MachineInstr &I, MachineRegisterInfo &MRI) {
4013 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4013, __extension__ __PRETTY_FUNCTION__))
;
4014 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4015 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4016 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4016, __extension__ __PRETTY_FUNCTION__))
;
4017 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4018
4019 if (I.getNumOperands() != 3)
4020 return false;
4021
4022 // Merging 2 s64s into an s128.
4023 if (DstTy == LLT::scalar(128)) {
4024 if (SrcTy.getSizeInBits() != 64)
4025 return false;
4026 Register DstReg = I.getOperand(0).getReg();
4027 Register Src1Reg = I.getOperand(1).getReg();
4028 Register Src2Reg = I.getOperand(2).getReg();
4029 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4030 MachineInstr *InsMI =
4031 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
4032 if (!InsMI)
4033 return false;
4034 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4035 Src2Reg, /* LaneIdx */ 1, RB, MIB);
4036 if (!Ins2MI)
4037 return false;
4038 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4039 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4040 I.eraseFromParent();
4041 return true;
4042 }
4043
4044 if (RB.getID() != AArch64::GPRRegBankID)
4045 return false;
4046
4047 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4048 return false;
4049
4050 auto *DstRC = &AArch64::GPR64RegClass;
4051 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4052 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4053 TII.get(TargetOpcode::SUBREG_TO_REG))
4054 .addDef(SubToRegDef)
4055 .addImm(0)
4056 .addUse(I.getOperand(1).getReg())
4057 .addImm(AArch64::sub_32);
4058 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4059 // Need to anyext the second scalar before we can use bfm
4060 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4061 TII.get(TargetOpcode::SUBREG_TO_REG))
4062 .addDef(SubToRegDef2)
4063 .addImm(0)
4064 .addUse(I.getOperand(2).getReg())
4065 .addImm(AArch64::sub_32);
4066 MachineInstr &BFM =
4067 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4068 .addDef(I.getOperand(0).getReg())
4069 .addUse(SubToRegDef)
4070 .addUse(SubToRegDef2)
4071 .addImm(32)
4072 .addImm(31);
4073 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4074 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4075 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4076 I.eraseFromParent();
4077 return true;
4078}
4079
4080static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4081 const unsigned EltSize) {
4082 // Choose a lane copy opcode and subregister based off of the size of the
4083 // vector's elements.
4084 switch (EltSize) {
4085 case 8:
4086 CopyOpc = AArch64::DUPi8;
4087 ExtractSubReg = AArch64::bsub;
4088 break;
4089 case 16:
4090 CopyOpc = AArch64::DUPi16;
4091 ExtractSubReg = AArch64::hsub;
4092 break;
4093 case 32:
4094 CopyOpc = AArch64::DUPi32;
4095 ExtractSubReg = AArch64::ssub;
4096 break;
4097 case 64:
4098 CopyOpc = AArch64::DUPi64;
4099 ExtractSubReg = AArch64::dsub;
4100 break;
4101 default:
4102 // Unknown size, bail out.
4103 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4104 return false;
4105 }
4106 return true;
4107}
4108
4109MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4110 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4111 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4112 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4113 unsigned CopyOpc = 0;
4114 unsigned ExtractSubReg = 0;
4115 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4116 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4117 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4118 return nullptr;
4119 }
4120
4121 const TargetRegisterClass *DstRC =
4122 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4123 if (!DstRC) {
4124 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4125 return nullptr;
4126 }
4127
4128 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4129 const LLT &VecTy = MRI.getType(VecReg);
4130 const TargetRegisterClass *VecRC =
4131 getRegClassForTypeOnBank(VecTy, VecRB, true);
4132 if (!VecRC) {
4133 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4134 return nullptr;
4135 }
4136
4137 // The register that we're going to copy into.
4138 Register InsertReg = VecReg;
4139 if (!DstReg)
4140 DstReg = MRI.createVirtualRegister(DstRC);
4141 // If the lane index is 0, we just use a subregister COPY.
4142 if (LaneIdx == 0) {
4143 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4144 .addReg(VecReg, 0, ExtractSubReg);
4145 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4146 return &*Copy;
4147 }
4148
4149 // Lane copies require 128-bit wide registers. If we're dealing with an
4150 // unpacked vector, then we need to move up to that width. Insert an implicit
4151 // def and a subregister insert to get us there.
4152 if (VecTy.getSizeInBits() != 128) {
4153 MachineInstr *ScalarToVector = emitScalarToVector(
4154 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4155 if (!ScalarToVector)
4156 return nullptr;
4157 InsertReg = ScalarToVector->getOperand(0).getReg();
4158 }
4159
4160 MachineInstr *LaneCopyMI =
4161 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4162 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4163
4164 // Make sure that we actually constrain the initial copy.
4165 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4166 return LaneCopyMI;
4167}
4168
4169bool AArch64InstructionSelector::selectExtractElt(
4170 MachineInstr &I, MachineRegisterInfo &MRI) {
4171 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __extension__ __PRETTY_FUNCTION__))
4172 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __extension__ __PRETTY_FUNCTION__))
;
4173 Register DstReg = I.getOperand(0).getReg();
4174 const LLT NarrowTy = MRI.getType(DstReg);
4175 const Register SrcReg = I.getOperand(1).getReg();
4176 const LLT WideTy = MRI.getType(SrcReg);
4177 (void)WideTy;
4178 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
4179 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
;
4180 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4180, __extension__ __PRETTY_FUNCTION__))
;
4181
4182 // Need the lane index to determine the correct copy opcode.
4183 MachineOperand &LaneIdxOp = I.getOperand(2);
4184 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4184, __extension__ __PRETTY_FUNCTION__))
;
4185
4186 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4187 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4188 return false;
4189 }
4190
4191 // Find the index to extract from.
4192 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4193 if (!VRegAndVal)
4194 return false;
4195 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4196
4197
4198 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4199 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4200 LaneIdx, MIB);
4201 if (!Extract)
4202 return false;
4203
4204 I.eraseFromParent();
4205 return true;
4206}
4207
4208bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4209 MachineInstr &I, MachineRegisterInfo &MRI) {
4210 unsigned NumElts = I.getNumOperands() - 1;
4211 Register SrcReg = I.getOperand(NumElts).getReg();
4212 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4213 const LLT SrcTy = MRI.getType(SrcReg);
4214
4215 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4215, __extension__ __PRETTY_FUNCTION__))
;
4216 if (SrcTy.getSizeInBits() > 128) {
4217 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4218 return false;
4219 }
4220
4221 // We implement a split vector operation by treating the sub-vectors as
4222 // scalars and extracting them.
4223 const RegisterBank &DstRB =
4224 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4225 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4226 Register Dst = I.getOperand(OpIdx).getReg();
4227 MachineInstr *Extract =
4228 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4229 if (!Extract)
4230 return false;
4231 }
4232 I.eraseFromParent();
4233 return true;
4234}
4235
4236bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4237 MachineRegisterInfo &MRI) {
4238 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4239, __extension__ __PRETTY_FUNCTION__))
4239 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4239, __extension__ __PRETTY_FUNCTION__))
;
4240
4241 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4242 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4243 AArch64::FPRRegBankID ||
4244 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4245 AArch64::FPRRegBankID) {
4246 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4247 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4248 return false;
4249 }
4250
4251 // The last operand is the vector source register, and every other operand is
4252 // a register to unpack into.
4253 unsigned NumElts = I.getNumOperands() - 1;
4254 Register SrcReg = I.getOperand(NumElts).getReg();
4255 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4256 const LLT WideTy = MRI.getType(SrcReg);
4257 (void)WideTy;
4258 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4259, __extension__ __PRETTY_FUNCTION__))
4259 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4259, __extension__ __PRETTY_FUNCTION__))
;
4260 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
4261 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
;
4262
4263 if (!NarrowTy.isScalar())
4264 return selectSplitVectorUnmerge(I, MRI);
4265
4266 // Choose a lane copy opcode and subregister based off of the size of the
4267 // vector's elements.
4268 unsigned CopyOpc = 0;
4269 unsigned ExtractSubReg = 0;
4270 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4271 return false;
4272
4273 // Set up for the lane copies.
4274 MachineBasicBlock &MBB = *I.getParent();
4275
4276 // Stores the registers we'll be copying from.
4277 SmallVector<Register, 4> InsertRegs;
4278
4279 // We'll use the first register twice, so we only need NumElts-1 registers.
4280 unsigned NumInsertRegs = NumElts - 1;
4281
4282 // If our elements fit into exactly 128 bits, then we can copy from the source
4283 // directly. Otherwise, we need to do a bit of setup with some subregister
4284 // inserts.
4285 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4286 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4287 } else {
4288 // No. We have to perform subregister inserts. For each insert, create an
4289 // implicit def and a subregister insert, and save the register we create.
4290 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4291 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4292 *RBI.getRegBank(SrcReg, MRI, TRI));
4293 unsigned SubReg = 0;
4294 bool Found = getSubRegForClass(RC, TRI, SubReg);
4295 (void)Found;
4296 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4296, __extension__ __PRETTY_FUNCTION__))
;
4297 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4298 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4299 MachineInstr &ImpDefMI =
4300 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4301 ImpDefReg);
4302
4303 // Now, create the subregister insert from SrcReg.
4304 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4305 MachineInstr &InsMI =
4306 *BuildMI(MBB, I, I.getDebugLoc(),
4307 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4308 .addUse(ImpDefReg)
4309 .addUse(SrcReg)
4310 .addImm(SubReg);
4311
4312 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4313 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4314
4315 // Save the register so that we can copy from it after.
4316 InsertRegs.push_back(InsertReg);
4317 }
4318 }
4319
4320 // Now that we've created any necessary subregister inserts, we can
4321 // create the copies.
4322 //
4323 // Perform the first copy separately as a subregister copy.
4324 Register CopyTo = I.getOperand(0).getReg();
4325 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4326 .addReg(InsertRegs[0], 0, ExtractSubReg);
4327 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4328
4329 // Now, perform the remaining copies as vector lane copies.
4330 unsigned LaneIdx = 1;
4331 for (Register InsReg : InsertRegs) {
4332 Register CopyTo = I.getOperand(LaneIdx).getReg();
4333 MachineInstr &CopyInst =
4334 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4335 .addUse(InsReg)
4336 .addImm(LaneIdx);
4337 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4338 ++LaneIdx;
4339 }
4340
4341 // Separately constrain the first copy's destination. Because of the
4342 // limitation in constrainOperandRegClass, we can't guarantee that this will
4343 // actually be constrained. So, do it ourselves using the second operand.
4344 const TargetRegisterClass *RC =
4345 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4346 if (!RC) {
4347 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4348 return false;
4349 }
4350
4351 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4352 I.eraseFromParent();
4353 return true;
4354}
4355
4356bool AArch64InstructionSelector::selectConcatVectors(
4357 MachineInstr &I, MachineRegisterInfo &MRI) {
4358 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4359, __extension__ __PRETTY_FUNCTION__))
4359 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4359, __extension__ __PRETTY_FUNCTION__))
;
4360 Register Dst = I.getOperand(0).getReg();
4361 Register Op1 = I.getOperand(1).getReg();
4362 Register Op2 = I.getOperand(2).getReg();
4363 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4364 if (!ConcatMI)
4365 return false;
4366 I.eraseFromParent();
4367 return true;
4368}
4369
4370unsigned
4371AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4372 MachineFunction &MF) const {
4373 Type *CPTy = CPVal->getType();
4374 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4375
4376 MachineConstantPool *MCP = MF.getConstantPool();
4377 return MCP->getConstantPoolIndex(CPVal, Alignment);
4378}
4379
4380MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4381 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4382 auto &MF = MIRBuilder.getMF();
4383 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4384
4385 auto Adrp =
4386 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4387 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4388
4389 MachineInstr *LoadMI = nullptr;
4390 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4391 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4392 switch (Size) {
4393 case 16:
4394 LoadMI =
4395 &*MIRBuilder
4396 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4397 .addConstantPoolIndex(CPIdx, 0,
4398 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4399 break;
4400 case 8:
4401 LoadMI =
4402 &*MIRBuilder
4403 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4404 .addConstantPoolIndex(CPIdx, 0,
4405 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4406 break;
4407 case 4:
4408 LoadMI =
4409 &*MIRBuilder
4410 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4411 .addConstantPoolIndex(CPIdx, 0,
4412 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4413 break;
4414 case 2:
4415 LoadMI =
4416 &*MIRBuilder
4417 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4418 .addConstantPoolIndex(CPIdx, 0,
4419 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4420 break;
4421 default:
4422 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4423 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4424 return nullptr;
4425 }
4426 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4427 MachineMemOperand::MOLoad,
4428 Size, Align(Size)));
4429 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4430 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4431 return LoadMI;
4432}
4433
4434/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4435/// size and RB.
4436static std::pair<unsigned, unsigned>
4437getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4438 unsigned Opc, SubregIdx;
4439 if (RB.getID() == AArch64::GPRRegBankID) {
4440 if (EltSize == 16) {
4441 Opc = AArch64::INSvi16gpr;
4442 SubregIdx = AArch64::ssub;
4443 } else if (EltSize == 32) {
4444 Opc = AArch64::INSvi32gpr;
4445 SubregIdx = AArch64::ssub;
4446 } else if (EltSize == 64) {
4447 Opc = AArch64::INSvi64gpr;
4448 SubregIdx = AArch64::dsub;
4449 } else {
4450 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4450)
;
4451 }
4452 } else {
4453 if (EltSize == 8) {
4454 Opc = AArch64::INSvi8lane;
4455 SubregIdx = AArch64::bsub;
4456 } else if (EltSize == 16) {
4457 Opc = AArch64::INSvi16lane;
4458 SubregIdx = AArch64::hsub;
4459 } else if (EltSize == 32) {
4460 Opc = AArch64::INSvi32lane;
4461 SubregIdx = AArch64::ssub;
4462 } else if (EltSize == 64) {
4463 Opc = AArch64::INSvi64lane;
4464 SubregIdx = AArch64::dsub;
4465 } else {
4466 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4466)
;
4467 }
4468 }
4469 return std::make_pair(Opc, SubregIdx);
4470}
4471
4472MachineInstr *AArch64InstructionSelector::emitInstr(
4473 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4474 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4475 const ComplexRendererFns &RenderFns) const {
4476 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4476, __extension__ __PRETTY_FUNCTION__))
;
4477 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4478, __extension__ __PRETTY_FUNCTION__))
4478 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4478, __extension__ __PRETTY_FUNCTION__))
;
4479 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4480 if (RenderFns)
4481 for (auto &Fn : *RenderFns)
4482 Fn(MI);
4483 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4484 return &*MI;
4485}
4486
4487MachineInstr *AArch64InstructionSelector::emitAddSub(
4488 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4489 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4490 MachineIRBuilder &MIRBuilder) const {
4491 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4492 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4492, __extension__ __PRETTY_FUNCTION__))
;
4493 auto Ty = MRI.getType(LHS.getReg());
4494 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4494, __extension__ __PRETTY_FUNCTION__))
;
4495 unsigned Size = Ty.getSizeInBits();
4496 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4496, __extension__ __PRETTY_FUNCTION__))
;
4497 bool Is32Bit = Size == 32;
4498
4499 // INSTRri form with positive arithmetic immediate.
4500 if (auto Fns = selectArithImmed(RHS))
4501 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4502 MIRBuilder, Fns);
4503
4504 // INSTRri form with negative arithmetic immediate.
4505 if (auto Fns = selectNegArithImmed(RHS))
4506 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4507 MIRBuilder, Fns);
4508
4509 // INSTRrx form.
4510 if (auto Fns = selectArithExtendedRegister(RHS))
4511 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4512 MIRBuilder, Fns);
4513
4514 // INSTRrs form.
4515 if (auto Fns = selectShiftedRegister(RHS))
4516 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4517 MIRBuilder, Fns);
4518 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4519 MIRBuilder);
4520}
4521
4522MachineInstr *
4523AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4524 MachineOperand &RHS,
4525 MachineIRBuilder &MIRBuilder) const {
4526 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4527 {{AArch64::ADDXri, AArch64::ADDWri},
4528 {AArch64::ADDXrs, AArch64::ADDWrs},
4529 {AArch64::ADDXrr, AArch64::ADDWrr},
4530 {AArch64::SUBXri, AArch64::SUBWri},
4531 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4532 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4533}
4534
4535MachineInstr *
4536AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4537 MachineOperand &RHS,
4538 MachineIRBuilder &MIRBuilder) const {
4539 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4540 {{AArch64::ADDSXri, AArch64::ADDSWri},
4541 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4542 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4543 {AArch64::SUBSXri, AArch64::SUBSWri},
4544 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4545 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4546}
4547
4548MachineInstr *
4549AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4550 MachineOperand &RHS,
4551 MachineIRBuilder &MIRBuilder) const {
4552 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4553 {{AArch64::SUBSXri, AArch64::SUBSWri},
4554 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4555 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4556 {AArch64::ADDSXri, AArch64::ADDSWri},
4557 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4558 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4559}
4560
4561MachineInstr *
4562AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4563 MachineIRBuilder &MIRBuilder) const {
4564 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4565 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4566 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4567 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4568}
4569
4570MachineInstr *
4571AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4572 MachineIRBuilder &MIRBuilder) const {
4573 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4573, __extension__ __PRETTY_FUNCTION__))
;
4574 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4575 LLT Ty = MRI.getType(LHS.getReg());
4576 unsigned RegSize = Ty.getSizeInBits();
4577 bool Is32Bit = (RegSize == 32);
4578 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4579 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4580 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4581 // ANDS needs a logical immediate for its immediate form. Check if we can
4582 // fold one in.
4583 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4584 int64_t Imm = ValAndVReg->Value.getSExtValue();
4585
4586 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4587 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4588 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4589 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4590 return &*TstMI;
4591 }
4592 }
4593
4594 if (auto Fns = selectLogicalShiftedRegister(RHS))
4595 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4596 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4597}
4598
4599MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4600 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4601 MachineIRBuilder &MIRBuilder) const {
4602 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4602, __extension__ __PRETTY_FUNCTION__))
;
4603 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4603, __extension__ __PRETTY_FUNCTION__))
;
4604 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4605 LLT CmpTy = MRI.getType(LHS.getReg());
4606 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4606, __extension__ __PRETTY_FUNCTION__))
;
4607 unsigned Size = CmpTy.getSizeInBits();
4608 (void)Size;
4609 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4609, __extension__ __PRETTY_FUNCTION__))
;
4610 // Fold the compare into a cmn or tst if possible.
4611 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4612 return FoldCmp;
4613 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4614 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4615}
4616
4617MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4618 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4619 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4620#ifndef NDEBUG
4621 LLT Ty = MRI.getType(Dst);
4622 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4623, __extension__ __PRETTY_FUNCTION__))
4623 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4623, __extension__ __PRETTY_FUNCTION__))
;
4624#endif
4625 const Register ZReg = AArch64::WZR;
4626 AArch64CC::CondCode CC1, CC2;
4627 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4628 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4629 if (CC2 == AArch64CC::AL)
4630 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4631 MIRBuilder);
4632 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4633 Register Def1Reg = MRI.createVirtualRegister(RC);
4634 Register Def2Reg = MRI.createVirtualRegister(RC);
4635 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4636 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4637 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4638 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4639 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4640 return &*OrMI;
4641}
4642
4643MachineInstr *
4644AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4645 MachineIRBuilder &MIRBuilder,
4646 Optional<CmpInst::Predicate> Pred) const {
4647 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4648 LLT Ty = MRI.getType(LHS);
4649 if (Ty.isVector())
4650 return nullptr;
4651 unsigned OpSize = Ty.getSizeInBits();
4652 if (OpSize != 32 && OpSize != 64)
4653 return nullptr;
4654
4655 // If this is a compare against +0.0, then we don't have
4656 // to explicitly materialize a constant.
4657 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4658 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4659
4660 auto IsEqualityPred = [](CmpInst::Predicate P) {
4661 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4662 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4663 };
4664 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4665 // Try commutating the operands.
4666 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4667 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4668 ShouldUseImm = true;
4669 std::swap(LHS, RHS);
4670 }
4671 }
4672 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4673 {AArch64::FCMPSri, AArch64::FCMPDri}};
4674 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4675
4676 // Partially build the compare. Decide if we need to add a use for the
4677 // third operand based off whether or not we're comparing against 0.0.
4678 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4679 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4680 if (!ShouldUseImm)
4681 CmpMI.addUse(RHS);
4682 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4683 return &*CmpMI;
4684}
4685
4686MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4687 Optional<Register> Dst, Register Op1, Register Op2,
4688 MachineIRBuilder &MIRBuilder) const {
4689 // We implement a vector concat by:
4690 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4691 // 2. Insert the upper vector into the destination's upper element
4692 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4693 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4694
4695 const LLT Op1Ty = MRI.getType(Op1);
4696 const LLT Op2Ty = MRI.getType(Op2);
4697
4698 if (Op1Ty != Op2Ty) {
4699 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4700 return nullptr;
4701 }
4702 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4702, __extension__ __PRETTY_FUNCTION__))
;
4703
4704 if (Op1Ty.getSizeInBits() >= 128) {
4705 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4706 return nullptr;
4707 }
4708
4709 // At the moment we just support 64 bit vector concats.
4710 if (Op1Ty.getSizeInBits() != 64) {
4711 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4712 return nullptr;
4713 }
4714
4715 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4716 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4717 const TargetRegisterClass *DstRC =
4718 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4719
4720 MachineInstr *WidenedOp1 =
4721 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4722 MachineInstr *WidenedOp2 =
4723 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4724 if (!WidenedOp1 || !WidenedOp2) {
4725 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4726 return nullptr;
4727 }
4728
4729 // Now do the insert of the upper element.
4730 unsigned InsertOpc, InsSubRegIdx;
4731 std::tie(InsertOpc, InsSubRegIdx) =
4732 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4733
4734 if (!Dst)
4735 Dst = MRI.createVirtualRegister(DstRC);
4736 auto InsElt =
4737 MIRBuilder
4738 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4739 .addImm(1) /* Lane index */
4740 .addUse(WidenedOp2->getOperand(0).getReg())
4741 .addImm(0);
4742 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4743 return &*InsElt;
4744}
4745
4746MachineInstr *
4747AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4748 Register Src2, AArch64CC::CondCode Pred,
4749 MachineIRBuilder &MIRBuilder) const {
4750 auto &MRI = *MIRBuilder.getMRI();
4751 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4752 // If we used a register class, then this won't necessarily have an LLT.
4753 // Compute the size based off whether or not we have a class or bank.
4754 unsigned Size;
4755 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4756 Size = TRI.getRegSizeInBits(*RC);
4757 else
4758 Size = MRI.getType(Dst).getSizeInBits();
4759 // Some opcodes use s1.
4760 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4760, __extension__ __PRETTY_FUNCTION__))
;
4761 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4762 unsigned Opc = OpcTable[Size == 64];
4763 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4764 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4765 return &*CSINC;
4766}
4767
4768std::pair<MachineInstr *, AArch64CC::CondCode>
4769AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4770 MachineOperand &LHS,
4771 MachineOperand &RHS,
4772 MachineIRBuilder &MIRBuilder) const {
4773 switch (Opcode) {
4774 default:
4775 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4775)
;
4776 case TargetOpcode::G_SADDO:
4777 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4778 case TargetOpcode::G_UADDO:
4779 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4780 case TargetOpcode::G_SSUBO:
4781 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4782 case TargetOpcode::G_USUBO:
4783 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4784 }
4785}
4786
4787/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4788/// expressed as a conjunction.
4789/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4790/// changing the conditions on the CMP tests.
4791/// (this means we can call emitConjunctionRec() with
4792/// Negate==true on this sub-tree)
4793/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4794/// cannot do the negation naturally. We are required to
4795/// emit the subtree first in this case.
4796/// \param WillNegate Is true if are called when the result of this
4797/// subexpression must be negated. This happens when the
4798/// outer expression is an OR. We can use this fact to know
4799/// that we have a double negation (or (or ...) ...) that
4800/// can be implemented for free.
4801static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4802 bool WillNegate, MachineRegisterInfo &MRI,
4803 unsigned Depth = 0) {
4804 if (!MRI.hasOneNonDBGUse(Val))
4805 return false;
4806 MachineInstr *ValDef = MRI.getVRegDef(Val);
4807 unsigned Opcode = ValDef->getOpcode();
4808 if (isa<GAnyCmp>(ValDef)) {
4809 CanNegate = true;
4810 MustBeFirst = false;
4811 return true;
4812 }
4813 // Protect against exponential runtime and stack overflow.
4814 if (Depth > 6)
4815 return false;
4816 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4817 bool IsOR = Opcode == TargetOpcode::G_OR;
4818 Register O0 = ValDef->getOperand(1).getReg();
4819 Register O1 = ValDef->getOperand(2).getReg();
4820 bool CanNegateL;
4821 bool MustBeFirstL;
4822 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4823 return false;
4824 bool CanNegateR;
4825 bool MustBeFirstR;
4826 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4827 return false;
4828
4829 if (MustBeFirstL && MustBeFirstR)
4830 return false;
4831
4832 if (IsOR) {
4833 // For an OR expression we need to be able to naturally negate at least
4834 // one side or we cannot do the transformation at all.
4835 if (!CanNegateL && !CanNegateR)
4836 return false;
4837 // If we the result of the OR will be negated and we can naturally negate
4838 // the leaves, then this sub-tree as a whole negates naturally.
4839 CanNegate = WillNegate && CanNegateL && CanNegateR;
4840 // If we cannot naturally negate the whole sub-tree, then this must be
4841 // emitted first.
4842 MustBeFirst = !CanNegate;
4843 } else {
4844 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4844, __extension__ __PRETTY_FUNCTION__))
;
4845 // We cannot naturally negate an AND operation.
4846 CanNegate = false;
4847 MustBeFirst = MustBeFirstL || MustBeFirstR;
4848 }
4849 return true;
4850 }
4851 return false;
4852}
4853
4854MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4855 Register LHS, Register RHS, CmpInst::Predicate CC,
4856 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4857 MachineIRBuilder &MIB) const {
4858 // TODO: emit CMN as an optimization.
4859 auto &MRI = *MIB.getMRI();
4860 LLT OpTy = MRI.getType(LHS);
4861 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4861, __extension__ __PRETTY_FUNCTION__))
;
4862 unsigned CCmpOpc;
4863 Optional<ValueAndVReg> C;
4864 if (CmpInst::isIntPredicate(CC)) {
4865 C = getIConstantVRegValWithLookThrough(RHS, MRI);
4866 if (C && C->Value.ult(32))
4867 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4868 else
4869 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4870 } else {
4871 switch (OpTy.getSizeInBits()) {
4872 case 16:
4873 CCmpOpc = AArch64::FCCMPHrr;
4874 break;
4875 case 32:
4876 CCmpOpc = AArch64::FCCMPSrr;
4877 break;
4878 case 64:
4879 CCmpOpc = AArch64::FCCMPDrr;
4880 break;
4881 default:
4882 return nullptr;
4883 }
4884 }
4885 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4886 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4887 auto CCmp =
4888 MIB.buildInstr(CCmpOpc, {}, {LHS});
4889 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4890 CCmp.addImm(C->Value.getZExtValue());
4891 else
4892 CCmp.addReg(RHS);
4893 CCmp.addImm(NZCV).addImm(Predicate);
4894 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4895 return &*CCmp;
4896}
4897
4898MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4899 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4900 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4901 // We're at a tree leaf, produce a conditional comparison operation.
4902 auto &MRI = *MIB.getMRI();
4903 MachineInstr *ValDef = MRI.getVRegDef(Val);
4904 unsigned Opcode = ValDef->getOpcode();
4905 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4906 Register LHS = Cmp->getLHSReg();
4907 Register RHS = Cmp->getRHSReg();
4908 CmpInst::Predicate CC = Cmp->getCond();
4909 if (Negate)
4910 CC = CmpInst::getInversePredicate(CC);
4911 if (isa<GICmp>(Cmp)) {
4912 OutCC = changeICMPPredToAArch64CC(CC);
4913 } else {
4914 // Handle special FP cases.
4915 AArch64CC::CondCode ExtraCC;
4916 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4917 // Some floating point conditions can't be tested with a single condition
4918 // code. Construct an additional comparison in this case.
4919 if (ExtraCC != AArch64CC::AL) {
4920 MachineInstr *ExtraCmp;
4921 if (!CCOp)
4922 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4923 else
4924 ExtraCmp =
4925 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4926 CCOp = ExtraCmp->getOperand(0).getReg();
4927 Predicate = ExtraCC;
4928 }
4929 }
4930
4931 // Produce a normal comparison if we are first in the chain
4932 if (!CCOp) {
4933 auto Dst = MRI.cloneVirtualRegister(LHS);
4934 if (isa<GICmp>(Cmp))
4935 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4936 return emitFPCompare(Cmp->getOperand(2).getReg(),
4937 Cmp->getOperand(3).getReg(), MIB);
4938 }
4939 // Otherwise produce a ccmp.
4940 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4941 }
4942 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4942, __extension__ __PRETTY_FUNCTION__))
;
4943
4944 bool IsOR = Opcode == TargetOpcode::G_OR;
4945
4946 Register LHS = ValDef->getOperand(1).getReg();
4947 bool CanNegateL;
4948 bool MustBeFirstL;
4949 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4950 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4950, __extension__ __PRETTY_FUNCTION__))
;
4951 (void)ValidL;
4952
4953 Register RHS = ValDef->getOperand(2).getReg();
4954 bool CanNegateR;
4955 bool MustBeFirstR;
4956 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4957 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4957, __extension__ __PRETTY_FUNCTION__))
;
4958 (void)ValidR;
4959
4960 // Swap sub-tree that must come first to the right side.
4961 if (MustBeFirstL) {
4962 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4962, __extension__ __PRETTY_FUNCTION__))
;
4963 std::swap(LHS, RHS);
4964 std::swap(CanNegateL, CanNegateR);
4965 std::swap(MustBeFirstL, MustBeFirstR);
4966 }
4967
4968 bool NegateR;
4969 bool NegateAfterR;
4970 bool NegateL;
4971 bool NegateAfterAll;
4972 if (Opcode == TargetOpcode::G_OR) {
4973 // Swap the sub-tree that we can negate naturally to the left.
4974 if (!CanNegateL) {
4975 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4975, __extension__ __PRETTY_FUNCTION__))
;
4976 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4976, __extension__ __PRETTY_FUNCTION__))
;
4977 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4977, __extension__ __PRETTY_FUNCTION__))
;
4978 std::swap(LHS, RHS);
4979 NegateR = false;
4980 NegateAfterR = true;
4981 } else {
4982 // Negate the left sub-tree if possible, otherwise negate the result.
4983 NegateR = CanNegateR;
4984 NegateAfterR = !CanNegateR;
4985 }
4986 NegateL = true;
4987 NegateAfterAll = !Negate;
4988 } else {
4989 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4990, __extension__ __PRETTY_FUNCTION__))
4990 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4990, __extension__ __PRETTY_FUNCTION__))
;
4991 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4991, __extension__ __PRETTY_FUNCTION__))
;
4992
4993 NegateL = false;
4994 NegateR = false;
4995 NegateAfterR = false;
4996 NegateAfterAll = false;
4997 }
4998
4999 // Emit sub-trees.
5000 AArch64CC::CondCode RHSCC;
5001 MachineInstr *CmpR =
5002 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
5003 if (NegateAfterR)
5004 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
5005 MachineInstr *CmpL = emitConjunctionRec(
5006 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
5007 if (NegateAfterAll)
5008 OutCC = AArch64CC::getInvertedCondCode(OutCC);
5009 return CmpL;
5010}
5011
5012MachineInstr *AArch64InstructionSelector::emitConjunction(
5013 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5014 bool DummyCanNegate;
5015 bool DummyMustBeFirst;
5016 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5017 *MIB.getMRI()))
5018 return nullptr;
5019 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5020}
5021
5022bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5023 MachineInstr &CondMI) {
5024 AArch64CC::CondCode AArch64CC;
5025 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5026 if (!ConjMI)
5027 return false;
5028
5029 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5030 SelI.eraseFromParent();
5031 return true;
5032}
5033
5034bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5035 MachineRegisterInfo &MRI = *MIB.getMRI();
5036 // We want to recognize this pattern:
5037 //
5038 // $z = G_FCMP pred, $x, $y
5039 // ...
5040 // $w = G_SELECT $z, $a, $b
5041 //
5042 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5043 // some copies/truncs in between.)
5044 //
5045 // If we see this, then we can emit something like this:
5046 //
5047 // fcmp $x, $y
5048 // fcsel $w, $a, $b, pred
5049 //
5050 // Rather than emitting both of the rather long sequences in the standard
5051 // G_FCMP/G_SELECT select methods.
5052
5053 // First, check if the condition is defined by a compare.
5054 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5055
5056 // We can only fold if all of the defs have one use.
5057 Register CondDefReg = CondDef->getOperand(0).getReg();
5058 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5059 // Unless it's another select.
5060 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5061 if (CondDef == &UI)
5062 continue;
5063 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5064 return false;
5065 }
5066 }
5067
5068 // Is the condition defined by a compare?
5069 unsigned CondOpc = CondDef->getOpcode();
5070 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5071 if (tryOptSelectConjunction(I, *CondDef))
5072 return true;
5073 return false;
5074 }
5075
5076 AArch64CC::CondCode CondCode;
5077 if (CondOpc == TargetOpcode::G_ICMP) {
5078 auto Pred =
5079 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5080 CondCode = changeICMPPredToAArch64CC(Pred);
5081 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5082 CondDef->getOperand(1), MIB);
5083 } else {
5084 // Get the condition code for the select.
5085 auto Pred =
5086 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5087 AArch64CC::CondCode CondCode2;
5088 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5089
5090 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5091 // instructions to emit the comparison.
5092 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5093 // unnecessary.
5094 if (CondCode2 != AArch64CC::AL)
5095 return false;
5096
5097 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5098 CondDef->getOperand(3).getReg(), MIB)) {
5099 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5100 return false;
5101 }
5102 }
5103
5104 // Emit the select.
5105 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5106 I.getOperand(3).getReg(), CondCode, MIB);
5107 I.eraseFromParent();
5108 return true;
5109}
5110
5111MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5112 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5113 MachineIRBuilder &MIRBuilder) const {
5114 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5115, __extension__ __PRETTY_FUNCTION__))
5115 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5115, __extension__ __PRETTY_FUNCTION__))
;
5116 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5117 // We want to find this sort of thing:
5118 // x = G_SUB 0, y
5119 // G_ICMP z, x
5120 //
5121 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5122 // e.g:
5123 //
5124 // cmn z, y
5125
5126 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5127 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5128 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5129 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5130 // Given this:
5131 //
5132 // x = G_SUB 0, y
5133 // G_ICMP x, z
5134 //
5135 // Produce this:
5136 //
5137 // cmn y, z
5138 if (isCMN(LHSDef, P, MRI))
5139 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5140
5141 // Same idea here, but with the RHS of the compare instead:
5142 //
5143 // Given this:
5144 //
5145 // x = G_SUB 0, y
5146 // G_ICMP z, x
5147 //
5148 // Produce this:
5149 //
5150 // cmn z, y
5151 if (isCMN(RHSDef, P, MRI))
5152 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5153
5154 // Given this:
5155 //
5156 // z = G_AND x, y
5157 // G_ICMP z, 0
5158 //
5159 // Produce this if the compare is signed:
5160 //
5161 // tst x, y
5162 if (!CmpInst::isUnsigned(P) && LHSDef &&
5163 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5164 // Make sure that the RHS is 0.
5165 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5166 if (!ValAndVReg || ValAndVReg->Value != 0)
5167 return nullptr;
5168
5169 return emitTST(LHSDef->getOperand(1),
5170 LHSDef->getOperand(2), MIRBuilder);
5171 }
5172
5173 return nullptr;
5174}
5175
5176bool AArch64InstructionSelector::selectShuffleVector(
5177 MachineInstr &I, MachineRegisterInfo &MRI) {
5178 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5179 Register Src1Reg = I.getOperand(1).getReg();
5180 const LLT Src1Ty = MRI.getType(Src1Reg);
5181 Register Src2Reg = I.getOperand(2).getReg();
5182 const LLT Src2Ty = MRI.getType(Src2Reg);
5183 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5184
5185 MachineBasicBlock &MBB = *I.getParent();
5186 MachineFunction &MF = *MBB.getParent();
5187 LLVMContext &Ctx = MF.getFunction().getContext();
5188
5189 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5190 // it's originated from a <1 x T> type. Those should have been lowered into
5191 // G_BUILD_VECTOR earlier.
5192 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5193 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5194 return false;
5195 }
5196
5197 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5198
5199 SmallVector<Constant *, 64> CstIdxs;
5200 for (int Val : Mask) {
5201 // For now, any undef indexes we'll just assume to be 0. This should be
5202 // optimized in future, e.g. to select DUP etc.
5203 Val = Val < 0 ? 0 : Val;
5204 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5205 unsigned Offset = Byte + Val * BytesPerElt;
5206 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5207 }
5208 }
5209
5210 // Use a constant pool to load the index vector for TBL.
5211 Constant *CPVal = ConstantVector::get(CstIdxs);
5212 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5213 if (!IndexLoad) {
5214 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5215 return false;
5216 }
5217
5218 if (DstTy.getSizeInBits() != 128) {
5219 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5219, __extension__ __PRETTY_FUNCTION__))
;
5220 // This case can be done with TBL1.
5221 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
5222 if (!Concat) {
5223 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5224 return false;
5225 }
5226
5227 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5228 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5229 IndexLoad->getOperand(0).getReg(), MIB);
5230
5231 auto TBL1 = MIB.buildInstr(
5232 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5233 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5234 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5235
5236 auto Copy =
5237 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5238 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5239 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5240 I.eraseFromParent();
5241 return true;
5242 }
5243
5244 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5245 // Q registers for regalloc.
5246 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5247 auto RegSeq = createQTuple(Regs, MIB);
5248 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5249 {RegSeq, IndexLoad->getOperand(0)});
5250 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5251 I.eraseFromParent();
5252 return true;
5253}
5254
5255MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5256 Optional<Register> DstReg, Register SrcReg, Register EltReg,
5257 unsigned LaneIdx, const RegisterBank &RB,
5258 MachineIRBuilder &MIRBuilder) const {
5259 MachineInstr *InsElt = nullptr;
5260 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5261 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5262
5263 // Create a register to define with the insert if one wasn't passed in.
5264 if (!DstReg)
5265 DstReg = MRI.createVirtualRegister(DstRC);
5266
5267 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5268 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5269
5270 if (RB.getID() == AArch64::FPRRegBankID) {
5271 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5272 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5273 .addImm(LaneIdx)
5274 .addUse(InsSub->getOperand(0).getReg())
5275 .addImm(0);
5276 } else {
5277 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5278 .addImm(LaneIdx)
5279 .addUse(EltReg);
5280 }
5281
5282 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5283 return InsElt;
5284}
5285
5286bool AArch64InstructionSelector::selectUSMovFromExtend(
5287 MachineInstr &MI, MachineRegisterInfo &MRI) {
5288 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5289 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5290 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5291 return false;
5292 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5293 const Register DefReg = MI.getOperand(0).getReg();
5294 const LLT DstTy = MRI.getType(DefReg);
5295 unsigned DstSize = DstTy.getSizeInBits();
5296
5297 if (DstSize != 32 && DstSize != 64)
5298 return false;
5299
5300 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5301 MI.getOperand(1).getReg(), MRI);
5302 int64_t Lane;
5303 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5304 return false;
5305 Register Src0 = Extract->getOperand(1).getReg();
5306
5307 const LLT &VecTy = MRI.getType(Src0);
5308
5309 if (VecTy.getSizeInBits() != 128) {
5310 const MachineInstr *ScalarToVector = emitScalarToVector(
5311 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5312 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5312, __extension__ __PRETTY_FUNCTION__))
;
5313 Src0 = ScalarToVector->getOperand(0).getReg();
5314 }
5315
5316 unsigned Opcode;
5317 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5318 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5319 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5320 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5321 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5322 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5323 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5324 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5325 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5326 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5327 else
5328 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5328)
;
5329
5330 // We may need to generate one of these, depending on the type and sign of the
5331 // input:
5332 // DstReg = SMOV Src0, Lane;
5333 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5334 MachineInstr *ExtI = nullptr;
5335 if (DstSize == 64 && !IsSigned) {
5336 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5337 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5338 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5339 .addImm(0)
5340 .addUse(NewReg)
5341 .addImm(AArch64::sub_32);
5342 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5343 } else
5344 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5345
5346 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5347 MI.eraseFromParent();
5348 return true;
5349}
5350
5351bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5352 MachineRegisterInfo &MRI) {
5353 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5353, __extension__ __PRETTY_FUNCTION__))
;
5354
5355 // Get information on the destination.
5356 Register DstReg = I.getOperand(0).getReg();
5357 const LLT DstTy = MRI.getType(DstReg);
5358 unsigned VecSize = DstTy.getSizeInBits();
5359
5360 // Get information on the element we want to insert into the destination.
5361 Register EltReg = I.getOperand(2).getReg();
5362 const LLT EltTy = MRI.getType(EltReg);
5363 unsigned EltSize = EltTy.getSizeInBits();
5364 if (EltSize < 16 || EltSize > 64)
5365 return false; // Don't support all element types yet.
5366
5367 // Find the definition of the index. Bail out if it's not defined by a
5368 // G_CONSTANT.
5369 Register IdxReg = I.getOperand(3).getReg();
5370 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5371 if (!VRegAndVal)
5372 return false;
5373 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5374
5375 // Perform the lane insert.
5376 Register SrcReg = I.getOperand(1).getReg();
5377 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5378
5379 if (VecSize < 128) {
5380 // If the vector we're inserting into is smaller than 128 bits, widen it
5381 // to 128 to do the insert.
5382 MachineInstr *ScalarToVec =
5383 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5384 if (!ScalarToVec)
5385 return false;
5386 SrcReg = ScalarToVec->getOperand(0).getReg();
5387 }
5388
5389 // Create an insert into a new FPR128 register.
5390 // Note that if our vector is already 128 bits, we end up emitting an extra
5391 // register.
5392 MachineInstr *InsMI =
5393 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5394
5395 if (VecSize < 128) {
5396 // If we had to widen to perform the insert, then we have to demote back to
5397 // the original size to get the result we want.
5398 Register DemoteVec = InsMI->getOperand(0).getReg();
5399 const TargetRegisterClass *RC =
5400 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
5401 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5402 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5403 return false;
5404 }
5405 unsigned SubReg = 0;
5406 if (!getSubRegForClass(RC, TRI, SubReg))
5407 return false;
5408 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5409 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5410 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5411 return false;
5412 }
5413 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5414 .addReg(DemoteVec, 0, SubReg);
5415 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5416 } else {
5417 // No widening needed.
5418 InsMI->getOperand(0).setReg(DstReg);
5419 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5420 }
5421
5422 I.eraseFromParent();
5423 return true;
5424}
5425
5426MachineInstr *
5427AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5428 MachineIRBuilder &MIRBuilder,
5429 MachineRegisterInfo &MRI) {
5430 LLT DstTy = MRI.getType(Dst);
5431 unsigned DstSize = DstTy.getSizeInBits();
5432 if (CV->isNullValue()) {
5433 if (DstSize == 128) {
5434 auto Mov =
5435 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5436 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5437 return &*Mov;
5438 }
5439
5440 if (DstSize == 64) {
5441 auto Mov =
5442 MIRBuilder
5443 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5444 .addImm(0);
5445 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5446 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5447 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5448 return &*Copy;
5449 }
5450 }
5451
5452 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5453 if (!CPLoad) {
5454 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5455 return nullptr;
5456 }
5457
5458 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5459 RBI.constrainGenericRegister(
5460 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5461 return &*Copy;
5462}
5463
5464bool AArch64InstructionSelector::tryOptConstantBuildVec(
5465 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5466 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5466, __extension__ __PRETTY_FUNCTION__))
;
5467 unsigned DstSize = DstTy.getSizeInBits();
5468 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5468, __extension__ __PRETTY_FUNCTION__))
;
5469 if (DstSize < 32)
5470 return false;
5471 // Check if we're building a constant vector, in which case we want to
5472 // generate a constant pool load instead of a vector insert sequence.
5473 SmallVector<Constant *, 16> Csts;
5474 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5475 // Try to find G_CONSTANT or G_FCONSTANT
5476 auto *OpMI =
5477 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5478 if (OpMI)
5479 Csts.emplace_back(
5480 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5481 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5482 I.getOperand(Idx).getReg(), MRI)))
5483 Csts.emplace_back(
5484 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5485 else
5486 return false;
5487 }
5488 Constant *CV = ConstantVector::get(Csts);
5489 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5490 return false;
5491 I.eraseFromParent();
5492 return true;
5493}
5494
5495bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5496 MachineInstr &I, MachineRegisterInfo &MRI) {
5497 // Given:
5498 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5499 //
5500 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5501 Register Dst = I.getOperand(0).getReg();
5502 Register EltReg = I.getOperand(1).getReg();
5503 LLT EltTy = MRI.getType(EltReg);
5504 // If the index isn't on the same bank as its elements, then this can't be a
5505 // SUBREG_TO_REG.
5506 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5507 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5508 if (EltRB != DstRB)
5509 return false;
5510 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5511 [&MRI](const MachineOperand &Op) {
5512 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5513 MRI);
5514 }))
5515 return false;
5516 unsigned SubReg;
5517 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5518 if (!EltRC)
5519 return false;
5520 const TargetRegisterClass *DstRC =
5521 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5522 if (!DstRC)
5523 return false;
5524 if (!getSubRegForClass(EltRC, TRI, SubReg))
5525 return false;
5526 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5527 .addImm(0)
5528 .addUse(EltReg)
5529 .addImm(SubReg);
5530 I.eraseFromParent();
5531 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5532 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5533}
5534
5535bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5536 MachineRegisterInfo &MRI) {
5537 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5537, __extension__ __PRETTY_FUNCTION__))
;
5538 // Until we port more of the optimized selections, for now just use a vector
5539 // insert sequence.
5540 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5541 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5542 unsigned EltSize = EltTy.getSizeInBits();
5543
5544 if (tryOptConstantBuildVec(I, DstTy, MRI))
5545 return true;
5546 if (tryOptBuildVecToSubregToReg(I, MRI))
5547 return true;
5548
5549 if (EltSize < 16 || EltSize > 64)
5550 return false; // Don't support all element types yet.
5551 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5552
5553 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5554 MachineInstr *ScalarToVec =
5555 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5556 I.getOperand(1).getReg(), MIB);
5557 if (!ScalarToVec)
5558 return false;
5559
5560 Register DstVec = ScalarToVec->getOperand(0).getReg();
5561 unsigned DstSize = DstTy.getSizeInBits();
5562
5563 // Keep track of the last MI we inserted. Later on, we might be able to save
5564 // a copy using it.
5565 MachineInstr *PrevMI = nullptr;
5566 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5567 // Note that if we don't do a subregister copy, we can end up making an
5568 // extra register.
5569 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5570 MIB);
5571 DstVec = PrevMI->getOperand(0).getReg();
5572 }
5573
5574 // If DstTy's size in bits is less than 128, then emit a subregister copy
5575 // from DstVec to the last register we've defined.
5576 if (DstSize < 128) {
5577 // Force this to be FPR using the destination vector.
5578 const TargetRegisterClass *RC =
5579 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5580 if (!RC)
5581 return false;
5582 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5583 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5584 return false;
5585 }
5586
5587 unsigned SubReg = 0;
5588 if (!getSubRegForClass(RC, TRI, SubReg))
5589 return false;
5590 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5591 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5592 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5593 return false;
5594 }
5595
5596 Register Reg = MRI.createVirtualRegister(RC);
5597 Register DstReg = I.getOperand(0).getReg();
5598
5599 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5600 MachineOperand &RegOp = I.getOperand(1);
5601 RegOp.setReg(Reg);
5602 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5603 } else {
5604 // We don't need a subregister copy. Save a copy by re-using the
5605 // destination register on the final insert.
5606 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5606, __extension__ __PRETTY_FUNCTION__))
;
5607 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5608 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5609 }
5610
5611 I.eraseFromParent();
5612 return true;
5613}
5614
5615bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5616 unsigned NumVecs,
5617 MachineInstr &I) {
5618 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5618, __extension__ __PRETTY_FUNCTION__))
;
5619 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5619, __extension__ __PRETTY_FUNCTION__))
;
5620 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5620, __extension__ __PRETTY_FUNCTION__))
;
5621 auto &MRI = *MIB.getMRI();
5622 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5623 unsigned Size = Ty.getSizeInBits();
5624 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5625, __extension__ __PRETTY_FUNCTION__))
5625 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5625, __extension__ __PRETTY_FUNCTION__))
;
5626 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5627 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5628 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5628, __extension__ __PRETTY_FUNCTION__))
;
5629 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5630 Load.cloneMemRefs(I);
5631 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5632 Register SelectedLoadDst = Load->getOperand(0).getReg();
5633 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5634 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5635 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5636 // Emit the subreg copies and immediately select them.
5637 // FIXME: We should refactor our copy code into an emitCopy helper and
5638 // clean up uses of this pattern elsewhere in the selector.
5639 selectCopy(*Vec, TII, MRI, TRI, RBI);
5640 }
5641 return true;
5642}
5643
5644bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5645 MachineInstr &I, MachineRegisterInfo &MRI) {
5646 // Find the intrinsic ID.
5647 unsigned IntrinID = I.getIntrinsicID();
5648
5649 const LLT S8 = LLT::scalar(8);
5650 const LLT S16 = LLT::scalar(16);
5651 const LLT S32 = LLT::scalar(32);
5652 const LLT S64 = LLT::scalar(64);
5653 const LLT P0 = LLT::pointer(0, 64);
5654 // Select the instruction.
5655 switch (IntrinID) {
5656 default:
5657 return false;
5658 case Intrinsic::aarch64_ldxp:
5659 case Intrinsic::aarch64_ldaxp: {
5660 auto NewI = MIB.buildInstr(
5661 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5662 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5663 {I.getOperand(3)});
5664 NewI.cloneMemRefs(I);
5665 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5666 break;
5667 }
5668 case Intrinsic::trap:
5669 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5670 break;
5671 case Intrinsic::debugtrap:
5672 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5673 break;
5674 case Intrinsic::ubsantrap:
5675 MIB.buildInstr(AArch64::BRK, {}, {})
5676 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5677 break;
5678 case Intrinsic::aarch64_neon_ld2: {
5679 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5680 unsigned Opc = 0;
5681 if (Ty == LLT::fixed_vector(8, S8))