Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 1013, column 7
6th function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-19-125528-33783-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/ADT/Optional.h"
24#include "llvm/BinaryFormat/Dwarf.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if None is
148 /// provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *emitFPCompare(Register LHS, Register RHS,
220 MachineIRBuilder &MIRBuilder,
221 Optional<CmpInst::Predicate> = None) const;
222
223 MachineInstr *emitInstr(unsigned Opcode,
224 std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = None) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510static const TargetRegisterClass *
511getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
512 const RegisterBankInfo &RBI,
513 bool GetAllRegSet = false) {
514 if (RB.getID() == AArch64::GPRRegBankID) {
515 if (Ty.getSizeInBits() <= 32)
516 return GetAllRegSet ? &AArch64::GPR32allRegClass
517 : &AArch64::GPR32RegClass;
518 if (Ty.getSizeInBits() == 64)
519 return GetAllRegSet ? &AArch64::GPR64allRegClass
520 : &AArch64::GPR64RegClass;
521 if (Ty.getSizeInBits() == 128)
522 return &AArch64::XSeqPairsClassRegClass;
523 return nullptr;
524 }
525
526 if (RB.getID() == AArch64::FPRRegBankID) {
527 switch (Ty.getSizeInBits()) {
528 case 8:
529 return &AArch64::FPR8RegClass;
530 case 16:
531 return &AArch64::FPR16RegClass;
532 case 32:
533 return &AArch64::FPR32RegClass;
534 case 64:
535 return &AArch64::FPR64RegClass;
536 case 128:
537 return &AArch64::FPR128RegClass;
538 }
539 return nullptr;
540 }
541
542 return nullptr;
543}
544
545/// Given a register bank, and size in bits, return the smallest register class
546/// that can represent that combination.
547static const TargetRegisterClass *
548getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
549 bool GetAllRegSet = false) {
550 unsigned RegBankID = RB.getID();
551
552 if (RegBankID == AArch64::GPRRegBankID) {
553 if (SizeInBits <= 32)
554 return GetAllRegSet ? &AArch64::GPR32allRegClass
555 : &AArch64::GPR32RegClass;
556 if (SizeInBits == 64)
557 return GetAllRegSet ? &AArch64::GPR64allRegClass
558 : &AArch64::GPR64RegClass;
559 if (SizeInBits == 128)
560 return &AArch64::XSeqPairsClassRegClass;
561 }
562
563 if (RegBankID == AArch64::FPRRegBankID) {
564 switch (SizeInBits) {
565 default:
566 return nullptr;
567 case 8:
568 return &AArch64::FPR8RegClass;
569 case 16:
570 return &AArch64::FPR16RegClass;
571 case 32:
572 return &AArch64::FPR32RegClass;
573 case 64:
574 return &AArch64::FPR64RegClass;
575 case 128:
576 return &AArch64::FPR128RegClass;
577 }
578 }
579
580 return nullptr;
581}
582
583/// Returns the correct subregister to use for a given register class.
584static bool getSubRegForClass(const TargetRegisterClass *RC,
585 const TargetRegisterInfo &TRI, unsigned &SubReg) {
586 switch (TRI.getRegSizeInBits(*RC)) {
43
Control jumps to the 'default' case at line 602
587 case 8:
588 SubReg = AArch64::bsub;
589 break;
590 case 16:
591 SubReg = AArch64::hsub;
592 break;
593 case 32:
594 if (RC != &AArch64::FPR32RegClass)
595 SubReg = AArch64::sub_32;
596 else
597 SubReg = AArch64::ssub;
598 break;
599 case 64:
600 SubReg = AArch64::dsub;
601 break;
602 default:
603 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
44
Assuming 'DebugFlag' is false
45
Loop condition is false. Exiting loop
604 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
605 return false;
46
Returning without writing to 'SubReg'
606 }
607
608 return true;
609}
610
611/// Returns the minimum size the given register bank can hold.
612static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
613 switch (RB.getID()) {
614 case AArch64::GPRRegBankID:
615 return 32;
616 case AArch64::FPRRegBankID:
617 return 8;
618 default:
619 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 619)
;
620 }
621}
622
623/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
624/// Helper function for functions like createDTuple and createQTuple.
625///
626/// \p RegClassIDs - The list of register class IDs available for some tuple of
627/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
628/// expected to contain between 2 and 4 tuple classes.
629///
630/// \p SubRegs - The list of subregister classes associated with each register
631/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
632/// subregister class. The index of each subregister class is expected to
633/// correspond with the index of each register class.
634///
635/// \returns Either the destination register of REG_SEQUENCE instruction that
636/// was created, or the 0th element of \p Regs if \p Regs contains a single
637/// element.
638static Register createTuple(ArrayRef<Register> Regs,
639 const unsigned RegClassIDs[],
640 const unsigned SubRegs[], MachineIRBuilder &MIB) {
641 unsigned NumRegs = Regs.size();
642 if (NumRegs == 1)
643 return Regs[0];
644 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 645, __extension__ __PRETTY_FUNCTION__))
645 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 645, __extension__ __PRETTY_FUNCTION__))
;
646 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
647 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
648 auto RegSequence =
649 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
650 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
651 RegSequence.addUse(Regs[I]);
652 RegSequence.addImm(SubRegs[I]);
653 }
654 return RegSequence.getReg(0);
655}
656
657/// Create a tuple of D-registers using the registers in \p Regs.
658static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
659 static const unsigned RegClassIDs[] = {
660 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
661 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
662 AArch64::dsub2, AArch64::dsub3};
663 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
664}
665
666/// Create a tuple of Q-registers using the registers in \p Regs.
667static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
668 static const unsigned RegClassIDs[] = {
669 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
670 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
671 AArch64::qsub2, AArch64::qsub3};
672 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
673}
674
675static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
676 auto &MI = *Root.getParent();
677 auto &MBB = *MI.getParent();
678 auto &MF = *MBB.getParent();
679 auto &MRI = MF.getRegInfo();
680 uint64_t Immed;
681 if (Root.isImm())
682 Immed = Root.getImm();
683 else if (Root.isCImm())
684 Immed = Root.getCImm()->getZExtValue();
685 else if (Root.isReg()) {
686 auto ValAndVReg =
687 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
688 if (!ValAndVReg)
689 return None;
690 Immed = ValAndVReg->Value.getSExtValue();
691 } else
692 return None;
693 return Immed;
694}
695
696/// Check whether \p I is a currently unsupported binary operation:
697/// - it has an unsized type
698/// - an operand is not a vreg
699/// - all operands are not in the same bank
700/// These are checks that should someday live in the verifier, but right now,
701/// these are mostly limitations of the aarch64 selector.
702static bool unsupportedBinOp(const MachineInstr &I,
703 const AArch64RegisterBankInfo &RBI,
704 const MachineRegisterInfo &MRI,
705 const AArch64RegisterInfo &TRI) {
706 LLT Ty = MRI.getType(I.getOperand(0).getReg());
707 if (!Ty.isValid()) {
708 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
709 return true;
710 }
711
712 const RegisterBank *PrevOpBank = nullptr;
713 for (auto &MO : I.operands()) {
714 // FIXME: Support non-register operands.
715 if (!MO.isReg()) {
716 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
717 return true;
718 }
719
720 // FIXME: Can generic operations have physical registers operands? If
721 // so, this will need to be taught about that, and we'll need to get the
722 // bank out of the minimal class for the register.
723 // Either way, this needs to be documented (and possibly verified).
724 if (!Register::isVirtualRegister(MO.getReg())) {
725 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
726 return true;
727 }
728
729 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
730 if (!OpBank) {
731 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
732 return true;
733 }
734
735 if (PrevOpBank && OpBank != PrevOpBank) {
736 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
737 return true;
738 }
739 PrevOpBank = OpBank;
740 }
741 return false;
742}
743
744/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
745/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
746/// and of size \p OpSize.
747/// \returns \p GenericOpc if the combination is unsupported.
748static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
749 unsigned OpSize) {
750 switch (RegBankID) {
751 case AArch64::GPRRegBankID:
752 if (OpSize == 32) {
753 switch (GenericOpc) {
754 case TargetOpcode::G_SHL:
755 return AArch64::LSLVWr;
756 case TargetOpcode::G_LSHR:
757 return AArch64::LSRVWr;
758 case TargetOpcode::G_ASHR:
759 return AArch64::ASRVWr;
760 default:
761 return GenericOpc;
762 }
763 } else if (OpSize == 64) {
764 switch (GenericOpc) {
765 case TargetOpcode::G_PTR_ADD:
766 return AArch64::ADDXrr;
767 case TargetOpcode::G_SHL:
768 return AArch64::LSLVXr;
769 case TargetOpcode::G_LSHR:
770 return AArch64::LSRVXr;
771 case TargetOpcode::G_ASHR:
772 return AArch64::ASRVXr;
773 default:
774 return GenericOpc;
775 }
776 }
777 break;
778 case AArch64::FPRRegBankID:
779 switch (OpSize) {
780 case 32:
781 switch (GenericOpc) {
782 case TargetOpcode::G_FADD:
783 return AArch64::FADDSrr;
784 case TargetOpcode::G_FSUB:
785 return AArch64::FSUBSrr;
786 case TargetOpcode::G_FMUL:
787 return AArch64::FMULSrr;
788 case TargetOpcode::G_FDIV:
789 return AArch64::FDIVSrr;
790 default:
791 return GenericOpc;
792 }
793 case 64:
794 switch (GenericOpc) {
795 case TargetOpcode::G_FADD:
796 return AArch64::FADDDrr;
797 case TargetOpcode::G_FSUB:
798 return AArch64::FSUBDrr;
799 case TargetOpcode::G_FMUL:
800 return AArch64::FMULDrr;
801 case TargetOpcode::G_FDIV:
802 return AArch64::FDIVDrr;
803 case TargetOpcode::G_OR:
804 return AArch64::ORRv8i8;
805 default:
806 return GenericOpc;
807 }
808 }
809 break;
810 }
811 return GenericOpc;
812}
813
814/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
815/// appropriate for the (value) register bank \p RegBankID and of memory access
816/// size \p OpSize. This returns the variant with the base+unsigned-immediate
817/// addressing mode (e.g., LDRXui).
818/// \returns \p GenericOpc if the combination is unsupported.
819static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
820 unsigned OpSize) {
821 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
822 switch (RegBankID) {
823 case AArch64::GPRRegBankID:
824 switch (OpSize) {
825 case 8:
826 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
827 case 16:
828 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
829 case 32:
830 return isStore ? AArch64::STRWui : AArch64::LDRWui;
831 case 64:
832 return isStore ? AArch64::STRXui : AArch64::LDRXui;
833 }
834 break;
835 case AArch64::FPRRegBankID:
836 switch (OpSize) {
837 case 8:
838 return isStore ? AArch64::STRBui : AArch64::LDRBui;
839 case 16:
840 return isStore ? AArch64::STRHui : AArch64::LDRHui;
841 case 32:
842 return isStore ? AArch64::STRSui : AArch64::LDRSui;
843 case 64:
844 return isStore ? AArch64::STRDui : AArch64::LDRDui;
845 case 128:
846 return isStore ? AArch64::STRQui : AArch64::LDRQui;
847 }
848 break;
849 }
850 return GenericOpc;
851}
852
853#ifndef NDEBUG
854/// Helper function that verifies that we have a valid copy at the end of
855/// selectCopy. Verifies that the source and dest have the expected sizes and
856/// then returns true.
857static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
858 const MachineRegisterInfo &MRI,
859 const TargetRegisterInfo &TRI,
860 const RegisterBankInfo &RBI) {
861 const Register DstReg = I.getOperand(0).getReg();
862 const Register SrcReg = I.getOperand(1).getReg();
863 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
864 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
865
866 // Make sure the size of the source and dest line up.
867 assert((static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
868 (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
869 // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
870 // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
871 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
872 // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
873 // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
874 // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
875 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
876 "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
;
877
878 // Check the size of the destination.
879 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 880, __extension__ __PRETTY_FUNCTION__))
880 "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 880, __extension__ __PRETTY_FUNCTION__))
;
881
882 return true;
883}
884#endif
885
886/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
887/// to \p *To.
888///
889/// E.g "To = COPY SrcReg:SubReg"
890static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
891 const RegisterBankInfo &RBI, Register SrcReg,
892 const TargetRegisterClass *To, unsigned SubReg) {
893 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 893, __extension__ __PRETTY_FUNCTION__))
;
894 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 894, __extension__ __PRETTY_FUNCTION__))
;
895 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 895, __extension__ __PRETTY_FUNCTION__))
;
896
897 MachineIRBuilder MIB(I);
898 auto SubRegCopy =
899 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
900 MachineOperand &RegOp = I.getOperand(1);
901 RegOp.setReg(SubRegCopy.getReg(0));
902
903 // It's possible that the destination register won't be constrained. Make
904 // sure that happens.
905 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
906 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
907
908 return true;
909}
910
911/// Helper function to get the source and destination register classes for a
912/// copy. Returns a std::pair containing the source register class for the
913/// copy, and the destination register class for the copy. If a register class
914/// cannot be determined, then it will be nullptr.
915static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
916getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
917 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
918 const RegisterBankInfo &RBI) {
919 Register DstReg = I.getOperand(0).getReg();
920 Register SrcReg = I.getOperand(1).getReg();
921 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
922 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
923 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
924 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
925
926 // Special casing for cross-bank copies of s1s. We can technically represent
927 // a 1-bit value with any size of register. The minimum size for a GPR is 32
928 // bits. So, we need to put the FPR on 32 bits as well.
929 //
930 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
931 // then we can pull it into the helpers that get the appropriate class for a
932 // register bank. Or make a new helper that carries along some constraint
933 // information.
934 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
935 SrcSize = DstSize = 32;
936
937 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
938 getMinClassForRegBank(DstRegBank, DstSize, true)};
939}
940
941static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
942 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
943 const RegisterBankInfo &RBI) {
944 Register DstReg = I.getOperand(0).getReg();
945 Register SrcReg = I.getOperand(1).getReg();
946 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
947 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
948
949 // Find the correct register classes for the source and destination registers.
950 const TargetRegisterClass *SrcRC;
951 const TargetRegisterClass *DstRC;
952 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
16
Calling 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
27
Returning from 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
28
Calling 'tuple::operator='
31
Returning from 'tuple::operator='
953
954 if (!DstRC) {
32
Assuming 'DstRC' is non-null
33
Taking false branch
955 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
956 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
957 return false;
958 }
959
960 // A couple helpers below, for making sure that the copy we produce is valid.
961
962 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
963 // to verify that the src and dst are the same size, since that's handled by
964 // the SUBREG_TO_REG.
965 bool KnownValid = false;
966
967 // Returns true, or asserts if something we don't expect happens. Instead of
968 // returning true, we return isValidCopy() to ensure that we verify the
969 // result.
970 auto CheckCopy = [&]() {
971 // If we have a bitcast or something, we can't have physical registers.
972 assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
973 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
974 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
975 "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
;
976 bool ValidCopy = true;
977#ifndef NDEBUG
978 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
979 assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy."
) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 979, __extension__ __PRETTY_FUNCTION__))
;
980#endif
981 (void)KnownValid;
982 return ValidCopy;
983 };
984
985 // Is this a copy? If so, then we may need to insert a subregister copy.
986 if (I.isCopy()) {
34
Taking true branch
987 // Yes. Check if there's anything to fix up.
988 if (!SrcRC) {
35
Assuming 'SrcRC' is non-null
36
Taking false branch
989 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
990 return false;
991 }
992
993 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
994 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
995 unsigned SubReg;
37
'SubReg' declared without an initial value
996
997 // If the source bank doesn't support a subregister copy small enough,
998 // then we first need to copy to the destination bank.
999 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
38
Assuming the condition is false
39
Taking false branch
1000 const TargetRegisterClass *DstTempRC =
1001 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1002 getSubRegForClass(DstRC, TRI, SubReg);
1003
1004 MachineIRBuilder MIB(I);
1005 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1006 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1007 } else if (SrcSize > DstSize) {
40
Assuming 'SrcSize' is > 'DstSize'
41
Taking true branch
1008 // If the source register is bigger than the destination we need to
1009 // perform a subregister copy.
1010 const TargetRegisterClass *SubRegRC =
1011 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1012 getSubRegForClass(SubRegRC, TRI, SubReg);
42
Calling 'getSubRegForClass'
47
Returning from 'getSubRegForClass'
1013 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
48
6th function call argument is an uninitialized value
1014 } else if (DstSize > SrcSize) {
1015 // If the destination register is bigger than the source we need to do
1016 // a promotion using SUBREG_TO_REG.
1017 const TargetRegisterClass *PromotionRC =
1018 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1019 getSubRegForClass(SrcRC, TRI, SubReg);
1020
1021 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1022 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1023 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1024 .addImm(0)
1025 .addUse(SrcReg)
1026 .addImm(SubReg);
1027 MachineOperand &RegOp = I.getOperand(1);
1028 RegOp.setReg(PromoteReg);
1029
1030 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1031 KnownValid = true;
1032 }
1033
1034 // If the destination is a physical register, then there's nothing to
1035 // change, so we're done.
1036 if (Register::isPhysicalRegister(DstReg))
1037 return CheckCopy();
1038 }
1039
1040 // No need to constrain SrcReg. It will get constrained when we hit another
1041 // of its use or its defs. Copies do not have constraints.
1042 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1043 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1044 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1045 return false;
1046 }
1047
1048 // If this a GPR ZEXT that we want to just reduce down into a copy.
1049 // The sizes will be mismatched with the source < 32b but that's ok.
1050 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1051 I.setDesc(TII.get(AArch64::COPY));
1052 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1052, __extension__ __PRETTY_FUNCTION__))
;
1053 return selectCopy(I, TII, MRI, TRI, RBI);
1054 }
1055
1056 I.setDesc(TII.get(AArch64::COPY));
1057 return CheckCopy();
1058}
1059
1060static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1061 if (!DstTy.isScalar() || !SrcTy.isScalar())
1062 return GenericOpc;
1063
1064 const unsigned DstSize = DstTy.getSizeInBits();
1065 const unsigned SrcSize = SrcTy.getSizeInBits();
1066
1067 switch (DstSize) {
1068 case 32:
1069 switch (SrcSize) {
1070 case 32:
1071 switch (GenericOpc) {
1072 case TargetOpcode::G_SITOFP:
1073 return AArch64::SCVTFUWSri;
1074 case TargetOpcode::G_UITOFP:
1075 return AArch64::UCVTFUWSri;
1076 case TargetOpcode::G_FPTOSI:
1077 return AArch64::FCVTZSUWSr;
1078 case TargetOpcode::G_FPTOUI:
1079 return AArch64::FCVTZUUWSr;
1080 default:
1081 return GenericOpc;
1082 }
1083 case 64:
1084 switch (GenericOpc) {
1085 case TargetOpcode::G_SITOFP:
1086 return AArch64::SCVTFUXSri;
1087 case TargetOpcode::G_UITOFP:
1088 return AArch64::UCVTFUXSri;
1089 case TargetOpcode::G_FPTOSI:
1090 return AArch64::FCVTZSUWDr;
1091 case TargetOpcode::G_FPTOUI:
1092 return AArch64::FCVTZUUWDr;
1093 default:
1094 return GenericOpc;
1095 }
1096 default:
1097 return GenericOpc;
1098 }
1099 case 64:
1100 switch (SrcSize) {
1101 case 32:
1102 switch (GenericOpc) {
1103 case TargetOpcode::G_SITOFP:
1104 return AArch64::SCVTFUWDri;
1105 case TargetOpcode::G_UITOFP:
1106 return AArch64::UCVTFUWDri;
1107 case TargetOpcode::G_FPTOSI:
1108 return AArch64::FCVTZSUXSr;
1109 case TargetOpcode::G_FPTOUI:
1110 return AArch64::FCVTZUUXSr;
1111 default:
1112 return GenericOpc;
1113 }
1114 case 64:
1115 switch (GenericOpc) {
1116 case TargetOpcode::G_SITOFP:
1117 return AArch64::SCVTFUXDri;
1118 case TargetOpcode::G_UITOFP:
1119 return AArch64::UCVTFUXDri;
1120 case TargetOpcode::G_FPTOSI:
1121 return AArch64::FCVTZSUXDr;
1122 case TargetOpcode::G_FPTOUI:
1123 return AArch64::FCVTZUUXDr;
1124 default:
1125 return GenericOpc;
1126 }
1127 default:
1128 return GenericOpc;
1129 }
1130 default:
1131 return GenericOpc;
1132 };
1133 return GenericOpc;
1134}
1135
1136MachineInstr *
1137AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1138 Register False, AArch64CC::CondCode CC,
1139 MachineIRBuilder &MIB) const {
1140 MachineRegisterInfo &MRI = *MIB.getMRI();
1141 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
1142 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
1143 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
;
1144 LLT Ty = MRI.getType(True);
1145 if (Ty.isVector())
1146 return nullptr;
1147 const unsigned Size = Ty.getSizeInBits();
1148 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1149, __extension__ __PRETTY_FUNCTION__))
1149 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1149, __extension__ __PRETTY_FUNCTION__))
;
1150 const bool Is32Bit = Size == 32;
1151 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1152 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1153 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1154 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1155 return &*FCSel;
1156 }
1157
1158 // By default, we'll try and emit a CSEL.
1159 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1160 bool Optimized = false;
1161 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1162 &Optimized](Register &Reg, Register &OtherReg,
1163 bool Invert) {
1164 if (Optimized)
1165 return false;
1166
1167 // Attempt to fold:
1168 //
1169 // %sub = G_SUB 0, %x
1170 // %select = G_SELECT cc, %reg, %sub
1171 //
1172 // Into:
1173 // %select = CSNEG %reg, %x, cc
1174 Register MatchReg;
1175 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1176 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1177 Reg = MatchReg;
1178 if (Invert) {
1179 CC = AArch64CC::getInvertedCondCode(CC);
1180 std::swap(Reg, OtherReg);
1181 }
1182 return true;
1183 }
1184
1185 // Attempt to fold:
1186 //
1187 // %xor = G_XOR %x, -1
1188 // %select = G_SELECT cc, %reg, %xor
1189 //
1190 // Into:
1191 // %select = CSINV %reg, %x, cc
1192 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1193 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1194 Reg = MatchReg;
1195 if (Invert) {
1196 CC = AArch64CC::getInvertedCondCode(CC);
1197 std::swap(Reg, OtherReg);
1198 }
1199 return true;
1200 }
1201
1202 // Attempt to fold:
1203 //
1204 // %add = G_ADD %x, 1
1205 // %select = G_SELECT cc, %reg, %add
1206 //
1207 // Into:
1208 // %select = CSINC %reg, %x, cc
1209 if (mi_match(Reg, MRI,
1210 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1211 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1212 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1213 Reg = MatchReg;
1214 if (Invert) {
1215 CC = AArch64CC::getInvertedCondCode(CC);
1216 std::swap(Reg, OtherReg);
1217 }
1218 return true;
1219 }
1220
1221 return false;
1222 };
1223
1224 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1225 // true/false values are constants.
1226 // FIXME: All of these patterns already exist in tablegen. We should be
1227 // able to import these.
1228 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1229 &Optimized]() {
1230 if (Optimized)
1231 return false;
1232 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1233 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1234 if (!TrueCst && !FalseCst)
1235 return false;
1236
1237 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1238 if (TrueCst && FalseCst) {
1239 int64_t T = TrueCst->Value.getSExtValue();
1240 int64_t F = FalseCst->Value.getSExtValue();
1241
1242 if (T == 0 && F == 1) {
1243 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1244 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1245 True = ZReg;
1246 False = ZReg;
1247 return true;
1248 }
1249
1250 if (T == 0 && F == -1) {
1251 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1252 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1253 True = ZReg;
1254 False = ZReg;
1255 return true;
1256 }
1257 }
1258
1259 if (TrueCst) {
1260 int64_t T = TrueCst->Value.getSExtValue();
1261 if (T == 1) {
1262 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1263 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1264 True = False;
1265 False = ZReg;
1266 CC = AArch64CC::getInvertedCondCode(CC);
1267 return true;
1268 }
1269
1270 if (T == -1) {
1271 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1272 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1273 True = False;
1274 False = ZReg;
1275 CC = AArch64CC::getInvertedCondCode(CC);
1276 return true;
1277 }
1278 }
1279
1280 if (FalseCst) {
1281 int64_t F = FalseCst->Value.getSExtValue();
1282 if (F == 1) {
1283 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1284 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1285 False = ZReg;
1286 return true;
1287 }
1288
1289 if (F == -1) {
1290 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1291 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1292 False = ZReg;
1293 return true;
1294 }
1295 }
1296 return false;
1297 };
1298
1299 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1300 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1301 Optimized |= TryOptSelectCst();
1302 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1303 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1304 return &*SelectInst;
1305}
1306
1307static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1308 switch (P) {
1309 default:
1310 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1310)
;
1311 case CmpInst::ICMP_NE:
1312 return AArch64CC::NE;
1313 case CmpInst::ICMP_EQ:
1314 return AArch64CC::EQ;
1315 case CmpInst::ICMP_SGT:
1316 return AArch64CC::GT;
1317 case CmpInst::ICMP_SGE:
1318 return AArch64CC::GE;
1319 case CmpInst::ICMP_SLT:
1320 return AArch64CC::LT;
1321 case CmpInst::ICMP_SLE:
1322 return AArch64CC::LE;
1323 case CmpInst::ICMP_UGT:
1324 return AArch64CC::HI;
1325 case CmpInst::ICMP_UGE:
1326 return AArch64CC::HS;
1327 case CmpInst::ICMP_ULT:
1328 return AArch64CC::LO;
1329 case CmpInst::ICMP_ULE:
1330 return AArch64CC::LS;
1331 }
1332}
1333
1334/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1335static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1336 AArch64CC::CondCode &CondCode,
1337 AArch64CC::CondCode &CondCode2) {
1338 CondCode2 = AArch64CC::AL;
1339 switch (CC) {
1340 default:
1341 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1341)
;
1342 case CmpInst::FCMP_OEQ:
1343 CondCode = AArch64CC::EQ;
1344 break;
1345 case CmpInst::FCMP_OGT:
1346 CondCode = AArch64CC::GT;
1347 break;
1348 case CmpInst::FCMP_OGE:
1349 CondCode = AArch64CC::GE;
1350 break;
1351 case CmpInst::FCMP_OLT:
1352 CondCode = AArch64CC::MI;
1353 break;
1354 case CmpInst::FCMP_OLE:
1355 CondCode = AArch64CC::LS;
1356 break;
1357 case CmpInst::FCMP_ONE:
1358 CondCode = AArch64CC::MI;
1359 CondCode2 = AArch64CC::GT;
1360 break;
1361 case CmpInst::FCMP_ORD:
1362 CondCode = AArch64CC::VC;
1363 break;
1364 case CmpInst::FCMP_UNO:
1365 CondCode = AArch64CC::VS;
1366 break;
1367 case CmpInst::FCMP_UEQ:
1368 CondCode = AArch64CC::EQ;
1369 CondCode2 = AArch64CC::VS;
1370 break;
1371 case CmpInst::FCMP_UGT:
1372 CondCode = AArch64CC::HI;
1373 break;
1374 case CmpInst::FCMP_UGE:
1375 CondCode = AArch64CC::PL;
1376 break;
1377 case CmpInst::FCMP_ULT:
1378 CondCode = AArch64CC::LT;
1379 break;
1380 case CmpInst::FCMP_ULE:
1381 CondCode = AArch64CC::LE;
1382 break;
1383 case CmpInst::FCMP_UNE:
1384 CondCode = AArch64CC::NE;
1385 break;
1386 }
1387}
1388
1389/// Convert an IR fp condition code to an AArch64 CC.
1390/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1391/// should be AND'ed instead of OR'ed.
1392static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1393 AArch64CC::CondCode &CondCode,
1394 AArch64CC::CondCode &CondCode2) {
1395 CondCode2 = AArch64CC::AL;
1396 switch (CC) {
1397 default:
1398 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1399 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1399, __extension__ __PRETTY_FUNCTION__))
;
1400 break;
1401 case CmpInst::FCMP_ONE:
1402 // (a one b)
1403 // == ((a olt b) || (a ogt b))
1404 // == ((a ord b) && (a une b))
1405 CondCode = AArch64CC::VC;
1406 CondCode2 = AArch64CC::NE;
1407 break;
1408 case CmpInst::FCMP_UEQ:
1409 // (a ueq b)
1410 // == ((a uno b) || (a oeq b))
1411 // == ((a ule b) && (a uge b))
1412 CondCode = AArch64CC::PL;
1413 CondCode2 = AArch64CC::LE;
1414 break;
1415 }
1416}
1417
1418/// Return a register which can be used as a bit to test in a TB(N)Z.
1419static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1420 MachineRegisterInfo &MRI) {
1421 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1421, __extension__ __PRETTY_FUNCTION__))
;
1422 bool HasZext = false;
1423 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1424 unsigned Opc = MI->getOpcode();
1425
1426 if (!MI->getOperand(0).isReg() ||
1427 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1428 break;
1429
1430 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1431 //
1432 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1433 // on the truncated x is the same as the bit number on x.
1434 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1435 Opc == TargetOpcode::G_TRUNC) {
1436 if (Opc == TargetOpcode::G_ZEXT)
1437 HasZext = true;
1438
1439 Register NextReg = MI->getOperand(1).getReg();
1440 // Did we find something worth folding?
1441 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1442 break;
1443
1444 // NextReg is worth folding. Keep looking.
1445 Reg = NextReg;
1446 continue;
1447 }
1448
1449 // Attempt to find a suitable operation with a constant on one side.
1450 Optional<uint64_t> C;
1451 Register TestReg;
1452 switch (Opc) {
1453 default:
1454 break;
1455 case TargetOpcode::G_AND:
1456 case TargetOpcode::G_XOR: {
1457 TestReg = MI->getOperand(1).getReg();
1458 Register ConstantReg = MI->getOperand(2).getReg();
1459 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1460 if (!VRegAndVal) {
1461 // AND commutes, check the other side for a constant.
1462 // FIXME: Can we canonicalize the constant so that it's always on the
1463 // same side at some point earlier?
1464 std::swap(ConstantReg, TestReg);
1465 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1466 }
1467 if (VRegAndVal) {
1468 if (HasZext)
1469 C = VRegAndVal->Value.getZExtValue();
1470 else
1471 C = VRegAndVal->Value.getSExtValue();
1472 }
1473 break;
1474 }
1475 case TargetOpcode::G_ASHR:
1476 case TargetOpcode::G_LSHR:
1477 case TargetOpcode::G_SHL: {
1478 TestReg = MI->getOperand(1).getReg();
1479 auto VRegAndVal =
1480 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1481 if (VRegAndVal)
1482 C = VRegAndVal->Value.getSExtValue();
1483 break;
1484 }
1485 }
1486
1487 // Didn't find a constant or viable register. Bail out of the loop.
1488 if (!C || !TestReg.isValid())
1489 break;
1490
1491 // We found a suitable instruction with a constant. Check to see if we can
1492 // walk through the instruction.
1493 Register NextReg;
1494 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1495 switch (Opc) {
1496 default:
1497 break;
1498 case TargetOpcode::G_AND:
1499 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1500 if ((*C >> Bit) & 1)
1501 NextReg = TestReg;
1502 break;
1503 case TargetOpcode::G_SHL:
1504 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1505 // the type of the register.
1506 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1507 NextReg = TestReg;
1508 Bit = Bit - *C;
1509 }
1510 break;
1511 case TargetOpcode::G_ASHR:
1512 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1513 // in x
1514 NextReg = TestReg;
1515 Bit = Bit + *C;
1516 if (Bit >= TestRegSize)
1517 Bit = TestRegSize - 1;
1518 break;
1519 case TargetOpcode::G_LSHR:
1520 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1521 if ((Bit + *C) < TestRegSize) {
1522 NextReg = TestReg;
1523 Bit = Bit + *C;
1524 }
1525 break;
1526 case TargetOpcode::G_XOR:
1527 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1528 // appropriate.
1529 //
1530 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1531 //
1532 // tbz x', b -> tbnz x, b
1533 //
1534 // Because x' only has the b-th bit set if x does not.
1535 if ((*C >> Bit) & 1)
1536 Invert = !Invert;
1537 NextReg = TestReg;
1538 break;
1539 }
1540
1541 // Check if we found anything worth folding.
1542 if (!NextReg.isValid())
1543 return Reg;
1544 Reg = NextReg;
1545 }
1546
1547 return Reg;
1548}
1549
1550MachineInstr *AArch64InstructionSelector::emitTestBit(
1551 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1552 MachineIRBuilder &MIB) const {
1553 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1553, __extension__ __PRETTY_FUNCTION__))
;
1554 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1555, __extension__ __PRETTY_FUNCTION__))
1555 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1555, __extension__ __PRETTY_FUNCTION__))
;
1556 MachineRegisterInfo &MRI = *MIB.getMRI();
1557
1558 // Attempt to optimize the test bit by walking over instructions.
1559 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1560 LLT Ty = MRI.getType(TestReg);
1561 unsigned Size = Ty.getSizeInBits();
1562 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1562, __extension__ __PRETTY_FUNCTION__))
;
1563 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1563, __extension__ __PRETTY_FUNCTION__))
;
1564
1565 // When the test register is a 64-bit register, we have to narrow to make
1566 // TBNZW work.
1567 bool UseWReg = Bit < 32;
1568 unsigned NecessarySize = UseWReg ? 32 : 64;
1569 if (Size != NecessarySize)
1570 TestReg = moveScalarRegClass(
1571 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1572 MIB);
1573
1574 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1575 {AArch64::TBZW, AArch64::TBNZW}};
1576 unsigned Opc = OpcTable[UseWReg][IsNegative];
1577 auto TestBitMI =
1578 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1579 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1580 return &*TestBitMI;
1581}
1582
1583bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1584 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1585 MachineIRBuilder &MIB) const {
1586 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1586, __extension__ __PRETTY_FUNCTION__))
;
1587 // Given something like this:
1588 //
1589 // %x = ...Something...
1590 // %one = G_CONSTANT i64 1
1591 // %zero = G_CONSTANT i64 0
1592 // %and = G_AND %x, %one
1593 // %cmp = G_ICMP intpred(ne), %and, %zero
1594 // %cmp_trunc = G_TRUNC %cmp
1595 // G_BRCOND %cmp_trunc, %bb.3
1596 //
1597 // We want to try and fold the AND into the G_BRCOND and produce either a
1598 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1599 //
1600 // In this case, we'd get
1601 //
1602 // TBNZ %x %bb.3
1603 //
1604
1605 // Check if the AND has a constant on its RHS which we can use as a mask.
1606 // If it's a power of 2, then it's the same as checking a specific bit.
1607 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1608 auto MaybeBit = getIConstantVRegValWithLookThrough(
1609 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1610 if (!MaybeBit)
1611 return false;
1612
1613 int32_t Bit = MaybeBit->Value.exactLogBase2();
1614 if (Bit < 0)
1615 return false;
1616
1617 Register TestReg = AndInst.getOperand(1).getReg();
1618
1619 // Emit a TB(N)Z.
1620 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1621 return true;
1622}
1623
1624MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1625 bool IsNegative,
1626 MachineBasicBlock *DestMBB,
1627 MachineIRBuilder &MIB) const {
1628 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1628, __extension__ __PRETTY_FUNCTION__))
;
1629 MachineRegisterInfo &MRI = *MIB.getMRI();
1630 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1632, __extension__ __PRETTY_FUNCTION__))
1631 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1632, __extension__ __PRETTY_FUNCTION__))
1632 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1632, __extension__ __PRETTY_FUNCTION__))
;
1633 auto Ty = MRI.getType(CompareReg);
1634 unsigned Width = Ty.getSizeInBits();
1635 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1635, __extension__ __PRETTY_FUNCTION__))
;
1636 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1636, __extension__ __PRETTY_FUNCTION__))
;
1637 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1638 {AArch64::CBNZW, AArch64::CBNZX}};
1639 unsigned Opc = OpcTable[IsNegative][Width == 64];
1640 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1641 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1642 return &*BranchMI;
1643}
1644
1645bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1646 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1647 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1647, __extension__ __PRETTY_FUNCTION__))
;
1648 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1648, __extension__ __PRETTY_FUNCTION__))
;
1649 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1650 // totally clean. Some of them require two branches to implement.
1651 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1652 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1653 Pred);
1654 AArch64CC::CondCode CC1, CC2;
1655 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1656 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1657 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1658 if (CC2 != AArch64CC::AL)
1659 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1660 I.eraseFromParent();
1661 return true;
1662}
1663
1664bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1665 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1666 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1666, __extension__ __PRETTY_FUNCTION__))
;
1667 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1667, __extension__ __PRETTY_FUNCTION__))
;
1668 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1669 //
1670 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1671 // instructions will not be produced, as they are conditional branch
1672 // instructions that do not set flags.
1673 if (!ProduceNonFlagSettingCondBr)
1674 return false;
1675
1676 MachineRegisterInfo &MRI = *MIB.getMRI();
1677 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1678 auto Pred =
1679 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1680 Register LHS = ICmp.getOperand(2).getReg();
1681 Register RHS = ICmp.getOperand(3).getReg();
1682
1683 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1684 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1685 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1686
1687 // When we can emit a TB(N)Z, prefer that.
1688 //
1689 // Handle non-commutative condition codes first.
1690 // Note that we don't want to do this when we have a G_AND because it can
1691 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1692 if (VRegAndVal && !AndInst) {
1693 int64_t C = VRegAndVal->Value.getSExtValue();
1694
1695 // When we have a greater-than comparison, we can just test if the msb is
1696 // zero.
1697 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1698 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1699 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1700 I.eraseFromParent();
1701 return true;
1702 }
1703
1704 // When we have a less than comparison, we can just test if the msb is not
1705 // zero.
1706 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1707 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1708 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1709 I.eraseFromParent();
1710 return true;
1711 }
1712 }
1713
1714 // Attempt to handle commutative condition codes. Right now, that's only
1715 // eq/ne.
1716 if (ICmpInst::isEquality(Pred)) {
1717 if (!VRegAndVal) {
1718 std::swap(RHS, LHS);
1719 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1720 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1721 }
1722
1723 if (VRegAndVal && VRegAndVal->Value == 0) {
1724 // If there's a G_AND feeding into this branch, try to fold it away by
1725 // emitting a TB(N)Z instead.
1726 //
1727 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1728 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1729 // would be redundant.
1730 if (AndInst &&
1731 tryOptAndIntoCompareBranch(
1732 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1733 I.eraseFromParent();
1734 return true;
1735 }
1736
1737 // Otherwise, try to emit a CB(N)Z instead.
1738 auto LHSTy = MRI.getType(LHS);
1739 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1740 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1741 I.eraseFromParent();
1742 return true;
1743 }
1744 }
1745 }
1746
1747 return false;
1748}
1749
1750bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1751 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1752 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1752, __extension__ __PRETTY_FUNCTION__))
;
1753 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1753, __extension__ __PRETTY_FUNCTION__))
;
1754 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1755 return true;
1756
1757 // Couldn't optimize. Emit a compare + a Bcc.
1758 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1759 auto PredOp = ICmp.getOperand(1);
1760 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1761 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1762 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1763 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1764 I.eraseFromParent();
1765 return true;
1766}
1767
1768bool AArch64InstructionSelector::selectCompareBranch(
1769 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1770 Register CondReg = I.getOperand(0).getReg();
1771 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1772 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1773 CondReg = CCMI->getOperand(1).getReg();
1774 CCMI = MRI.getVRegDef(CondReg);
1775 }
1776
1777 // Try to select the G_BRCOND using whatever is feeding the condition if
1778 // possible.
1779 unsigned CCMIOpc = CCMI->getOpcode();
1780 if (CCMIOpc == TargetOpcode::G_FCMP)
1781 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1782 if (CCMIOpc == TargetOpcode::G_ICMP)
1783 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1784
1785 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1786 // instructions will not be produced, as they are conditional branch
1787 // instructions that do not set flags.
1788 if (ProduceNonFlagSettingCondBr) {
1789 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1790 I.getOperand(1).getMBB(), MIB);
1791 I.eraseFromParent();
1792 return true;
1793 }
1794
1795 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1796 auto TstMI =
1797 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1798 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1799 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1800 .addImm(AArch64CC::EQ)
1801 .addMBB(I.getOperand(1).getMBB());
1802 I.eraseFromParent();
1803 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1804}
1805
1806/// Returns the element immediate value of a vector shift operand if found.
1807/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1808static Optional<int64_t> getVectorShiftImm(Register Reg,
1809 MachineRegisterInfo &MRI) {
1810 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1810, __extension__ __PRETTY_FUNCTION__))
;
1811 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1812 assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand"
) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1812, __extension__ __PRETTY_FUNCTION__))
;
1813 return getAArch64VectorSplatScalar(*OpMI, MRI);
1814}
1815
1816/// Matches and returns the shift immediate value for a SHL instruction given
1817/// a shift operand.
1818static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1819 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1820 if (!ShiftImm)
1821 return None;
1822 // Check the immediate is in range for a SHL.
1823 int64_t Imm = *ShiftImm;
1824 if (Imm < 0)
1825 return None;
1826 switch (SrcTy.getElementType().getSizeInBits()) {
1827 default:
1828 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1829 return None;
1830 case 8:
1831 if (Imm > 7)
1832 return None;
1833 break;
1834 case 16:
1835 if (Imm > 15)
1836 return None;
1837 break;
1838 case 32:
1839 if (Imm > 31)
1840 return None;
1841 break;
1842 case 64:
1843 if (Imm > 63)
1844 return None;
1845 break;
1846 }
1847 return Imm;
1848}
1849
1850bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1851 MachineRegisterInfo &MRI) {
1852 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1852, __extension__ __PRETTY_FUNCTION__))
;
1853 Register DstReg = I.getOperand(0).getReg();
1854 const LLT Ty = MRI.getType(DstReg);
1855 Register Src1Reg = I.getOperand(1).getReg();
1856 Register Src2Reg = I.getOperand(2).getReg();
1857
1858 if (!Ty.isVector())
1859 return false;
1860
1861 // Check if we have a vector of constants on RHS that we can select as the
1862 // immediate form.
1863 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1864
1865 unsigned Opc = 0;
1866 if (Ty == LLT::fixed_vector(2, 64)) {
1867 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1868 } else if (Ty == LLT::fixed_vector(4, 32)) {
1869 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1870 } else if (Ty == LLT::fixed_vector(2, 32)) {
1871 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1872 } else if (Ty == LLT::fixed_vector(4, 16)) {
1873 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1874 } else if (Ty == LLT::fixed_vector(8, 16)) {
1875 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1876 } else if (Ty == LLT::fixed_vector(16, 8)) {
1877 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1878 } else if (Ty == LLT::fixed_vector(8, 8)) {
1879 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1880 } else {
1881 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1882 return false;
1883 }
1884
1885 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1886 if (ImmVal)
1887 Shl.addImm(*ImmVal);
1888 else
1889 Shl.addUse(Src2Reg);
1890 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1891 I.eraseFromParent();
1892 return true;
1893}
1894
1895bool AArch64InstructionSelector::selectVectorAshrLshr(
1896 MachineInstr &I, MachineRegisterInfo &MRI) {
1897 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1898, __extension__ __PRETTY_FUNCTION__))
1898 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1898, __extension__ __PRETTY_FUNCTION__))
;
1899 Register DstReg = I.getOperand(0).getReg();
1900 const LLT Ty = MRI.getType(DstReg);
1901 Register Src1Reg = I.getOperand(1).getReg();
1902 Register Src2Reg = I.getOperand(2).getReg();
1903
1904 if (!Ty.isVector())
1905 return false;
1906
1907 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1908
1909 // We expect the immediate case to be lowered in the PostLegalCombiner to
1910 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1911
1912 // There is not a shift right register instruction, but the shift left
1913 // register instruction takes a signed value, where negative numbers specify a
1914 // right shift.
1915
1916 unsigned Opc = 0;
1917 unsigned NegOpc = 0;
1918 const TargetRegisterClass *RC =
1919 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1920 if (Ty == LLT::fixed_vector(2, 64)) {
1921 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1922 NegOpc = AArch64::NEGv2i64;
1923 } else if (Ty == LLT::fixed_vector(4, 32)) {
1924 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1925 NegOpc = AArch64::NEGv4i32;
1926 } else if (Ty == LLT::fixed_vector(2, 32)) {
1927 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1928 NegOpc = AArch64::NEGv2i32;
1929 } else if (Ty == LLT::fixed_vector(4, 16)) {
1930 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1931 NegOpc = AArch64::NEGv4i16;
1932 } else if (Ty == LLT::fixed_vector(8, 16)) {
1933 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1934 NegOpc = AArch64::NEGv8i16;
1935 } else if (Ty == LLT::fixed_vector(16, 8)) {
1936 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1937 NegOpc = AArch64::NEGv16i8;
1938 } else if (Ty == LLT::fixed_vector(8, 8)) {
1939 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1940 NegOpc = AArch64::NEGv8i8;
1941 } else {
1942 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1943 return false;
1944 }
1945
1946 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1947 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1948 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1949 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1950 I.eraseFromParent();
1951 return true;
1952}
1953
1954bool AArch64InstructionSelector::selectVaStartAAPCS(
1955 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1956 return false;
1957}
1958
1959bool AArch64InstructionSelector::selectVaStartDarwin(
1960 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1961 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1962 Register ListReg = I.getOperand(0).getReg();
1963
1964 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1965
1966 auto MIB =
1967 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1968 .addDef(ArgsAddrReg)
1969 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1970 .addImm(0)
1971 .addImm(0);
1972
1973 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1974
1975 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1976 .addUse(ArgsAddrReg)
1977 .addUse(ListReg)
1978 .addImm(0)
1979 .addMemOperand(*I.memoperands_begin());
1980
1981 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1982 I.eraseFromParent();
1983 return true;
1984}
1985
1986void AArch64InstructionSelector::materializeLargeCMVal(
1987 MachineInstr &I, const Value *V, unsigned OpFlags) {
1988 MachineBasicBlock &MBB = *I.getParent();
1989 MachineFunction &MF = *MBB.getParent();
1990 MachineRegisterInfo &MRI = MF.getRegInfo();
1991
1992 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1993 MovZ->addOperand(MF, I.getOperand(1));
1994 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1995 AArch64II::MO_NC);
1996 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1997 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1998
1999 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2000 Register ForceDstReg) {
2001 Register DstReg = ForceDstReg
2002 ? ForceDstReg
2003 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2004 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2005 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2006 MovI->addOperand(MF, MachineOperand::CreateGA(
2007 GV, MovZ->getOperand(1).getOffset(), Flags));
2008 } else {
2009 MovI->addOperand(
2010 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2011 MovZ->getOperand(1).getOffset(), Flags));
2012 }
2013 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2014 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
2015 return DstReg;
2016 };
2017 Register DstReg = BuildMovK(MovZ.getReg(0),
2018 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
2019 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2020 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2021}
2022
2023bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2024 MachineBasicBlock &MBB = *I.getParent();
2025 MachineFunction &MF = *MBB.getParent();
2026 MachineRegisterInfo &MRI = MF.getRegInfo();
2027
2028 switch (I.getOpcode()) {
2029 case TargetOpcode::G_STORE: {
2030 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2031 MachineOperand &SrcOp = I.getOperand(0);
2032 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2033 // Allow matching with imported patterns for stores of pointers. Unlike
2034 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2035 // and constrain.
2036 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2037 Register NewSrc = Copy.getReg(0);
2038 SrcOp.setReg(NewSrc);
2039 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2040 Changed = true;
2041 }
2042 return Changed;
2043 }
2044 case TargetOpcode::G_PTR_ADD:
2045 return convertPtrAddToAdd(I, MRI);
2046 case TargetOpcode::G_LOAD: {
2047 // For scalar loads of pointers, we try to convert the dest type from p0
2048 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2049 // conversion, this should be ok because all users should have been
2050 // selected already, so the type doesn't matter for them.
2051 Register DstReg = I.getOperand(0).getReg();
2052 const LLT DstTy = MRI.getType(DstReg);
2053 if (!DstTy.isPointer())
2054 return false;
2055 MRI.setType(DstReg, LLT::scalar(64));
2056 return true;
2057 }
2058 case AArch64::G_DUP: {
2059 // Convert the type from p0 to s64 to help selection.
2060 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2061 if (!DstTy.getElementType().isPointer())
2062 return false;
2063 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2064 MRI.setType(I.getOperand(0).getReg(),
2065 DstTy.changeElementType(LLT::scalar(64)));
2066 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2067 I.getOperand(1).setReg(NewSrc.getReg(0));
2068 return true;
2069 }
2070 case TargetOpcode::G_UITOFP:
2071 case TargetOpcode::G_SITOFP: {
2072 // If both source and destination regbanks are FPR, then convert the opcode
2073 // to G_SITOF so that the importer can select it to an fpr variant.
2074 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2075 // copy.
2076 Register SrcReg = I.getOperand(1).getReg();
2077 LLT SrcTy = MRI.getType(SrcReg);
2078 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2079 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2080 return false;
2081
2082 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2083 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2084 I.setDesc(TII.get(AArch64::G_SITOF));
2085 else
2086 I.setDesc(TII.get(AArch64::G_UITOF));
2087 return true;
2088 }
2089 return false;
2090 }
2091 default:
2092 return false;
2093 }
2094}
2095
2096/// This lowering tries to look for G_PTR_ADD instructions and then converts
2097/// them to a standard G_ADD with a COPY on the source.
2098///
2099/// The motivation behind this is to expose the add semantics to the imported
2100/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2101/// because the selector works bottom up, uses before defs. By the time we
2102/// end up trying to select a G_PTR_ADD, we should have already attempted to
2103/// fold this into addressing modes and were therefore unsuccessful.
2104bool AArch64InstructionSelector::convertPtrAddToAdd(
2105 MachineInstr &I, MachineRegisterInfo &MRI) {
2106 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2106, __extension__ __PRETTY_FUNCTION__))
;
2107 Register DstReg = I.getOperand(0).getReg();
2108 Register AddOp1Reg = I.getOperand(1).getReg();
2109 const LLT PtrTy = MRI.getType(DstReg);
2110 if (PtrTy.getAddressSpace() != 0)
2111 return false;
2112
2113 const LLT CastPtrTy =
2114 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2115 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2116 // Set regbanks on the registers.
2117 if (PtrTy.isVector())
2118 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2119 else
2120 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2121
2122 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2123 // %dst(intty) = G_ADD %intbase, off
2124 I.setDesc(TII.get(TargetOpcode::G_ADD));
2125 MRI.setType(DstReg, CastPtrTy);
2126 I.getOperand(1).setReg(PtrToInt.getReg(0));
2127 if (!select(*PtrToInt)) {
2128 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2129 return false;
2130 }
2131
2132 // Also take the opportunity here to try to do some optimization.
2133 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2134 Register NegatedReg;
2135 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2136 return true;
2137 I.getOperand(2).setReg(NegatedReg);
2138 I.setDesc(TII.get(TargetOpcode::G_SUB));
2139 return true;
2140}
2141
2142bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2143 MachineRegisterInfo &MRI) {
2144 // We try to match the immediate variant of LSL, which is actually an alias
2145 // for a special case of UBFM. Otherwise, we fall back to the imported
2146 // selector which will match the register variant.
2147 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2147, __extension__ __PRETTY_FUNCTION__))
;
2148 const auto &MO = I.getOperand(2);
2149 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2150 if (!VRegAndVal)
2151 return false;
2152
2153 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2154 if (DstTy.isVector())
2155 return false;
2156 bool Is64Bit = DstTy.getSizeInBits() == 64;
2157 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2158 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2159
2160 if (!Imm1Fn || !Imm2Fn)
2161 return false;
2162
2163 auto NewI =
2164 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2165 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2166
2167 for (auto &RenderFn : *Imm1Fn)
2168 RenderFn(NewI);
2169 for (auto &RenderFn : *Imm2Fn)
2170 RenderFn(NewI);
2171
2172 I.eraseFromParent();
2173 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2174}
2175
2176bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2177 MachineInstr &I, MachineRegisterInfo &MRI) {
2178 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2178, __extension__ __PRETTY_FUNCTION__))
;
2179 // If we're storing a scalar, it doesn't matter what register bank that
2180 // scalar is on. All that matters is the size.
2181 //
2182 // So, if we see something like this (with a 32-bit scalar as an example):
2183 //
2184 // %x:gpr(s32) = ... something ...
2185 // %y:fpr(s32) = COPY %x:gpr(s32)
2186 // G_STORE %y:fpr(s32)
2187 //
2188 // We can fix this up into something like this:
2189 //
2190 // G_STORE %x:gpr(s32)
2191 //
2192 // And then continue the selection process normally.
2193 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2194 if (!DefDstReg.isValid())
2195 return false;
2196 LLT DefDstTy = MRI.getType(DefDstReg);
2197 Register StoreSrcReg = I.getOperand(0).getReg();
2198 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2199
2200 // If we get something strange like a physical register, then we shouldn't
2201 // go any further.
2202 if (!DefDstTy.isValid())
2203 return false;
2204
2205 // Are the source and dst types the same size?
2206 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2207 return false;
2208
2209 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2210 RBI.getRegBank(DefDstReg, MRI, TRI))
2211 return false;
2212
2213 // We have a cross-bank copy, which is entering a store. Let's fold it.
2214 I.getOperand(0).setReg(DefDstReg);
2215 return true;
2216}
2217
2218bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2219 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2219, __extension__ __PRETTY_FUNCTION__))
;
2220 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2220, __extension__ __PRETTY_FUNCTION__))
;
2221
2222 MachineBasicBlock &MBB = *I.getParent();
2223 MachineFunction &MF = *MBB.getParent();
2224 MachineRegisterInfo &MRI = MF.getRegInfo();
2225
2226 switch (I.getOpcode()) {
2227 case AArch64::G_DUP: {
2228 // Before selecting a DUP instruction, check if it is better selected as a
2229 // MOV or load from a constant pool.
2230 Register Src = I.getOperand(1).getReg();
2231 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2232 if (!ValAndVReg)
2233 return false;
2234 LLVMContext &Ctx = MF.getFunction().getContext();
2235 Register Dst = I.getOperand(0).getReg();
2236 auto *CV = ConstantDataVector::getSplat(
2237 MRI.getType(Dst).getNumElements(),
2238 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2239 ValAndVReg->Value));
2240 if (!emitConstantVector(Dst, CV, MIB, MRI))
2241 return false;
2242 I.eraseFromParent();
2243 return true;
2244 }
2245 case TargetOpcode::G_SEXT:
2246 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2247 // over a normal extend.
2248 if (selectUSMovFromExtend(I, MRI))
2249 return true;
2250 return false;
2251 case TargetOpcode::G_BR:
2252 return false;
2253 case TargetOpcode::G_SHL:
2254 return earlySelectSHL(I, MRI);
2255 case TargetOpcode::G_CONSTANT: {
2256 bool IsZero = false;
2257 if (I.getOperand(1).isCImm())
2258 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2259 else if (I.getOperand(1).isImm())
2260 IsZero = I.getOperand(1).getImm() == 0;
2261
2262 if (!IsZero)
2263 return false;
2264
2265 Register DefReg = I.getOperand(0).getReg();
2266 LLT Ty = MRI.getType(DefReg);
2267 if (Ty.getSizeInBits() == 64) {
2268 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2269 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2270 } else if (Ty.getSizeInBits() == 32) {
2271 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2272 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2273 } else
2274 return false;
2275
2276 I.setDesc(TII.get(TargetOpcode::COPY));
2277 return true;
2278 }
2279
2280 case TargetOpcode::G_ADD: {
2281 // Check if this is being fed by a G_ICMP on either side.
2282 //
2283 // (cmp pred, x, y) + z
2284 //
2285 // In the above case, when the cmp is true, we increment z by 1. So, we can
2286 // fold the add into the cset for the cmp by using cinc.
2287 //
2288 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2289 Register AddDst = I.getOperand(0).getReg();
2290 Register AddLHS = I.getOperand(1).getReg();
2291 Register AddRHS = I.getOperand(2).getReg();
2292 // Only handle scalars.
2293 LLT Ty = MRI.getType(AddLHS);
2294 if (Ty.isVector())
2295 return false;
2296 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2297 // bits.
2298 unsigned Size = Ty.getSizeInBits();
2299 if (Size != 32 && Size != 64)
2300 return false;
2301 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2302 if (!MRI.hasOneNonDBGUse(Reg))
2303 return nullptr;
2304 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2305 // compare.
2306 if (Size == 32)
2307 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2308 // We model scalar compares using 32-bit destinations right now.
2309 // If it's a 64-bit compare, it'll have 64-bit sources.
2310 Register ZExt;
2311 if (!mi_match(Reg, MRI,
2312 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2313 return nullptr;
2314 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2315 if (!Cmp ||
2316 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2317 return nullptr;
2318 return Cmp;
2319 };
2320 // Try to match
2321 // z + (cmp pred, x, y)
2322 MachineInstr *Cmp = MatchCmp(AddRHS);
2323 if (!Cmp) {
2324 // (cmp pred, x, y) + z
2325 std::swap(AddLHS, AddRHS);
2326 Cmp = MatchCmp(AddRHS);
2327 if (!Cmp)
2328 return false;
2329 }
2330 auto &PredOp = Cmp->getOperand(1);
2331 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2332 const AArch64CC::CondCode InvCC =
2333 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2334 MIB.setInstrAndDebugLoc(I);
2335 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2336 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2337 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2338 I.eraseFromParent();
2339 return true;
2340 }
2341 case TargetOpcode::G_OR: {
2342 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2343 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2344 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2345 Register Dst = I.getOperand(0).getReg();
2346 LLT Ty = MRI.getType(Dst);
2347
2348 if (!Ty.isScalar())
2349 return false;
2350
2351 unsigned Size = Ty.getSizeInBits();
2352 if (Size != 32 && Size != 64)
2353 return false;
2354
2355 Register ShiftSrc;
2356 int64_t ShiftImm;
2357 Register MaskSrc;
2358 int64_t MaskImm;
2359 if (!mi_match(
2360 Dst, MRI,
2361 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2362 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2363 return false;
2364
2365 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2366 return false;
2367
2368 int64_t Immr = Size - ShiftImm;
2369 int64_t Imms = Size - ShiftImm - 1;
2370 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2371 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2372 I.eraseFromParent();
2373 return true;
2374 }
2375 default:
2376 return false;
2377 }
2378}
2379
2380bool AArch64InstructionSelector::select(MachineInstr &I) {
2381 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2381, __extension__ __PRETTY_FUNCTION__))
;
2382 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2382, __extension__ __PRETTY_FUNCTION__))
;
2383
2384 MachineBasicBlock &MBB = *I.getParent();
2385 MachineFunction &MF = *MBB.getParent();
2386 MachineRegisterInfo &MRI = MF.getRegInfo();
2387
2388 const AArch64Subtarget *Subtarget =
2389 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2390 if (Subtarget->requiresStrictAlign()) {
2391 // We don't support this feature yet.
2392 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2393 return false;
2394 }
2395
2396 MIB.setInstrAndDebugLoc(I);
2397
2398 unsigned Opcode = I.getOpcode();
2399 // G_PHI requires same handling as PHI
2400 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2401 // Certain non-generic instructions also need some special handling.
2402
2403 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2404 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2405
2406 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2407 const Register DefReg = I.getOperand(0).getReg();
2408 const LLT DefTy = MRI.getType(DefReg);
2409
2410 const RegClassOrRegBank &RegClassOrBank =
2411 MRI.getRegClassOrRegBank(DefReg);
2412
2413 const TargetRegisterClass *DefRC
2414 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2415 if (!DefRC) {
2416 if (!DefTy.isValid()) {
2417 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2418 return false;
2419 }
2420 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2421 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2422 if (!DefRC) {
2423 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2424 return false;
2425 }
2426 }
2427
2428 I.setDesc(TII.get(TargetOpcode::PHI));
2429
2430 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2431 }
2432
2433 if (I.isCopy())
2434 return selectCopy(I, TII, MRI, TRI, RBI);
2435
2436 return true;
2437 }
2438
2439
2440 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2441 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2442 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2443 return false;
2444 }
2445
2446 // Try to do some lowering before we start instruction selecting. These
2447 // lowerings are purely transformations on the input G_MIR and so selection
2448 // must continue after any modification of the instruction.
2449 if (preISelLower(I)) {
2450 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2451 }
2452
2453 // There may be patterns where the importer can't deal with them optimally,
2454 // but does select it to a suboptimal sequence so our custom C++ selection
2455 // code later never has a chance to work on it. Therefore, we have an early
2456 // selection attempt here to give priority to certain selection routines
2457 // over the imported ones.
2458 if (earlySelect(I))
2459 return true;
2460
2461 if (selectImpl(I, *CoverageInfo))
2462 return true;
2463
2464 LLT Ty =
2465 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2466
2467 switch (Opcode) {
2468 case TargetOpcode::G_SBFX:
2469 case TargetOpcode::G_UBFX: {
2470 static const unsigned OpcTable[2][2] = {
2471 {AArch64::UBFMWri, AArch64::UBFMXri},
2472 {AArch64::SBFMWri, AArch64::SBFMXri}};
2473 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2474 unsigned Size = Ty.getSizeInBits();
2475 unsigned Opc = OpcTable[IsSigned][Size == 64];
2476 auto Cst1 =
2477 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2478 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2478, __extension__ __PRETTY_FUNCTION__))
;
2479 auto Cst2 =
2480 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2481 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2481, __extension__ __PRETTY_FUNCTION__))
;
2482 auto LSB = Cst1->Value.getZExtValue();
2483 auto Width = Cst2->Value.getZExtValue();
2484 auto BitfieldInst =
2485 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2486 .addImm(LSB)
2487 .addImm(LSB + Width - 1);
2488 I.eraseFromParent();
2489 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2490 }
2491 case TargetOpcode::G_BRCOND:
2492 return selectCompareBranch(I, MF, MRI);
2493
2494 case TargetOpcode::G_BRINDIRECT: {
2495 I.setDesc(TII.get(AArch64::BR));
2496 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2497 }
2498
2499 case TargetOpcode::G_BRJT:
2500 return selectBrJT(I, MRI);
2501
2502 case AArch64::G_ADD_LOW: {
2503 // This op may have been separated from it's ADRP companion by the localizer
2504 // or some other code motion pass. Given that many CPUs will try to
2505 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2506 // which will later be expanded into an ADRP+ADD pair after scheduling.
2507 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2508 if (BaseMI->getOpcode() != AArch64::ADRP) {
2509 I.setDesc(TII.get(AArch64::ADDXri));
2510 I.addOperand(MachineOperand::CreateImm(0));
2511 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2512 }
2513 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2514, __extension__ __PRETTY_FUNCTION__))
2514 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2514, __extension__ __PRETTY_FUNCTION__))
;
2515 auto Op1 = BaseMI->getOperand(1);
2516 auto Op2 = I.getOperand(2);
2517 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2518 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2519 Op1.getTargetFlags())
2520 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2521 Op2.getTargetFlags());
2522 I.eraseFromParent();
2523 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2524 }
2525
2526 case TargetOpcode::G_BSWAP: {
2527 // Handle vector types for G_BSWAP directly.
2528 Register DstReg = I.getOperand(0).getReg();
2529 LLT DstTy = MRI.getType(DstReg);
2530
2531 // We should only get vector types here; everything else is handled by the
2532 // importer right now.
2533 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2534 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2535 return false;
2536 }
2537
2538 // Only handle 4 and 2 element vectors for now.
2539 // TODO: 16-bit elements.
2540 unsigned NumElts = DstTy.getNumElements();
2541 if (NumElts != 4 && NumElts != 2) {
2542 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2543 return false;
2544 }
2545
2546 // Choose the correct opcode for the supported types. Right now, that's
2547 // v2s32, v4s32, and v2s64.
2548 unsigned Opc = 0;
2549 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2550 if (EltSize == 32)
2551 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2552 : AArch64::REV32v16i8;
2553 else if (EltSize == 64)
2554 Opc = AArch64::REV64v16i8;
2555
2556 // We should always get something by the time we get here...
2557 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2557, __extension__ __PRETTY_FUNCTION__))
;
2558
2559 I.setDesc(TII.get(Opc));
2560 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2561 }
2562
2563 case TargetOpcode::G_FCONSTANT:
2564 case TargetOpcode::G_CONSTANT: {
2565 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2566
2567 const LLT s8 = LLT::scalar(8);
2568 const LLT s16 = LLT::scalar(16);
2569 const LLT s32 = LLT::scalar(32);
2570 const LLT s64 = LLT::scalar(64);
2571 const LLT s128 = LLT::scalar(128);
2572 const LLT p0 = LLT::pointer(0, 64);
2573
2574 const Register DefReg = I.getOperand(0).getReg();
2575 const LLT DefTy = MRI.getType(DefReg);
2576 const unsigned DefSize = DefTy.getSizeInBits();
2577 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2578
2579 // FIXME: Redundant check, but even less readable when factored out.
2580 if (isFP) {
2581 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2582 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2583 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2584 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2585 return false;
2586 }
2587
2588 if (RB.getID() != AArch64::FPRRegBankID) {
2589 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2590 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2591 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2592 return false;
2593 }
2594
2595 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2596 // can be sure tablegen works correctly and isn't rescued by this code.
2597 // 0.0 is not covered by tablegen for FP128. So we will handle this
2598 // scenario in the code here.
2599 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2600 return false;
2601 } else {
2602 // s32 and s64 are covered by tablegen.
2603 if (Ty != p0 && Ty != s8 && Ty != s16) {
2604 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2605 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2606 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2607 return false;
2608 }
2609
2610 if (RB.getID() != AArch64::GPRRegBankID) {
2611 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2612 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2613 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2614 return false;
2615 }
2616 }
2617
2618 if (isFP) {
2619 const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2620 // For 16, 64, and 128b values, emit a constant pool load.
2621 switch (DefSize) {
2622 default:
2623 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2623)
;
2624 case 32:
2625 // For s32, use a cp load if we have optsize/minsize.
2626 if (!shouldOptForSize(&MF))
2627 break;
2628 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2629 case 16:
2630 case 64:
2631 case 128: {
2632 auto *FPImm = I.getOperand(1).getFPImm();
2633 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2634 if (!LoadMI) {
2635 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2636 return false;
2637 }
2638 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2639 I.eraseFromParent();
2640 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2641 }
2642 }
2643
2644 // Either emit a FMOV, or emit a copy to emit a normal mov.
2645 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2646, __extension__ __PRETTY_FUNCTION__))
2646 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2646, __extension__ __PRETTY_FUNCTION__))
;
2647 const Register DefGPRReg =
2648 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2649 MachineOperand &RegOp = I.getOperand(0);
2650 RegOp.setReg(DefGPRReg);
2651 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2652 MIB.buildCopy({DefReg}, {DefGPRReg});
2653
2654 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2655 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2656 return false;
2657 }
2658
2659 MachineOperand &ImmOp = I.getOperand(1);
2660 // FIXME: Is going through int64_t always correct?
2661 ImmOp.ChangeToImmediate(
2662 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2663 } else if (I.getOperand(1).isCImm()) {
2664 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2665 I.getOperand(1).ChangeToImmediate(Val);
2666 } else if (I.getOperand(1).isImm()) {
2667 uint64_t Val = I.getOperand(1).getImm();
2668 I.getOperand(1).ChangeToImmediate(Val);
2669 }
2670
2671 const unsigned MovOpc =
2672 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2673 I.setDesc(TII.get(MovOpc));
2674 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2675 return true;
2676 }
2677 case TargetOpcode::G_EXTRACT: {
2678 Register DstReg = I.getOperand(0).getReg();
2679 Register SrcReg = I.getOperand(1).getReg();
2680 LLT SrcTy = MRI.getType(SrcReg);
2681 LLT DstTy = MRI.getType(DstReg);
2682 (void)DstTy;
2683 unsigned SrcSize = SrcTy.getSizeInBits();
2684
2685 if (SrcTy.getSizeInBits() > 64) {
2686 // This should be an extract of an s128, which is like a vector extract.
2687 if (SrcTy.getSizeInBits() != 128)
2688 return false;
2689 // Only support extracting 64 bits from an s128 at the moment.
2690 if (DstTy.getSizeInBits() != 64)
2691 return false;
2692
2693 unsigned Offset = I.getOperand(2).getImm();
2694 if (Offset % 64 != 0)
2695 return false;
2696
2697 // Check we have the right regbank always.
2698 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2699 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2700 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2700, __extension__ __PRETTY_FUNCTION__))
;
2701
2702 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2703 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2704 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2705 I.eraseFromParent();
2706 return true;
2707 }
2708
2709 // Emit the same code as a vector extract.
2710 // Offset must be a multiple of 64.
2711 unsigned LaneIdx = Offset / 64;
2712 MachineInstr *Extract = emitExtractVectorElt(
2713 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2714 if (!Extract)
2715 return false;
2716 I.eraseFromParent();
2717 return true;
2718 }
2719
2720 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2721 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2722 Ty.getSizeInBits() - 1);
2723
2724 if (SrcSize < 64) {
2725 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
2726 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
;
2727 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2728 }
2729
2730 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2731 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2732 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2733 .addReg(DstReg, 0, AArch64::sub_32);
2734 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2735 AArch64::GPR32RegClass, MRI);
2736 I.getOperand(0).setReg(DstReg);
2737
2738 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2739 }
2740
2741 case TargetOpcode::G_INSERT: {
2742 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2743 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2744 unsigned DstSize = DstTy.getSizeInBits();
2745 // Larger inserts are vectors, same-size ones should be something else by
2746 // now (split up or turned into COPYs).
2747 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2748 return false;
2749
2750 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2751 unsigned LSB = I.getOperand(3).getImm();
2752 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2753 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2754 MachineInstrBuilder(MF, I).addImm(Width - 1);
2755
2756 if (DstSize < 64) {
2757 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
2758 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
;
2759 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2760 }
2761
2762 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2763 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2764 TII.get(AArch64::SUBREG_TO_REG))
2765 .addDef(SrcReg)
2766 .addImm(0)
2767 .addUse(I.getOperand(2).getReg())
2768 .addImm(AArch64::sub_32);
2769 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2770 AArch64::GPR32RegClass, MRI);
2771 I.getOperand(2).setReg(SrcReg);
2772
2773 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2774 }
2775 case TargetOpcode::G_FRAME_INDEX: {
2776 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2777 if (Ty != LLT::pointer(0, 64)) {
2778 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2779 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2780 return false;
2781 }
2782 I.setDesc(TII.get(AArch64::ADDXri));
2783
2784 // MOs for a #0 shifted immediate.
2785 I.addOperand(MachineOperand::CreateImm(0));
2786 I.addOperand(MachineOperand::CreateImm(0));
2787
2788 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2789 }
2790
2791 case TargetOpcode::G_GLOBAL_VALUE: {
2792 auto GV = I.getOperand(1).getGlobal();
2793 if (GV->isThreadLocal())
2794 return selectTLSGlobalValue(I, MRI);
2795
2796 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2797 if (OpFlags & AArch64II::MO_GOT) {
2798 I.setDesc(TII.get(AArch64::LOADgot));
2799 I.getOperand(1).setTargetFlags(OpFlags);
2800 } else if (TM.getCodeModel() == CodeModel::Large) {
2801 // Materialize the global using movz/movk instructions.
2802 materializeLargeCMVal(I, GV, OpFlags);
2803 I.eraseFromParent();
2804 return true;
2805 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2806 I.setDesc(TII.get(AArch64::ADR));
2807 I.getOperand(1).setTargetFlags(OpFlags);
2808 } else {
2809 I.setDesc(TII.get(AArch64::MOVaddr));
2810 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2811 MachineInstrBuilder MIB(MF, I);
2812 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2813 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2814 }
2815 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2816 }
2817
2818 case TargetOpcode::G_ZEXTLOAD:
2819 case TargetOpcode::G_LOAD:
2820 case TargetOpcode::G_STORE: {
2821 GLoadStore &LdSt = cast<GLoadStore>(I);
2822 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2823 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2824
2825 if (PtrTy != LLT::pointer(0, 64)) {
2826 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2827 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2828 return false;
2829 }
2830
2831 uint64_t MemSizeInBytes = LdSt.getMemSize();
2832 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2833 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2834
2835 // Need special instructions for atomics that affect ordering.
2836 if (Order != AtomicOrdering::NotAtomic &&
2837 Order != AtomicOrdering::Unordered &&
2838 Order != AtomicOrdering::Monotonic) {
2839 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2839, __extension__ __PRETTY_FUNCTION__))
;
2840 if (MemSizeInBytes > 64)
2841 return false;
2842
2843 if (isa<GLoad>(LdSt)) {
2844 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2845 AArch64::LDARW, AArch64::LDARX};
2846 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2847 } else {
2848 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2849 AArch64::STLRW, AArch64::STLRX};
2850 Register ValReg = LdSt.getReg(0);
2851 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2852 // Emit a subreg copy of 32 bits.
2853 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2854 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2855 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2856 I.getOperand(0).setReg(NewVal);
2857 }
2858 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2859 }
2860 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2861 return true;
2862 }
2863
2864#ifndef NDEBUG
2865 const Register PtrReg = LdSt.getPointerReg();
2866 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2867 // Check that the pointer register is valid.
2868 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2869, __extension__ __PRETTY_FUNCTION__))
2869 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2869, __extension__ __PRETTY_FUNCTION__))
;
2870 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2871, __extension__ __PRETTY_FUNCTION__))
2871 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2871, __extension__ __PRETTY_FUNCTION__))
;
2872#endif
2873
2874 const Register ValReg = LdSt.getReg(0);
2875 const LLT ValTy = MRI.getType(ValReg);
2876 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2877
2878 // The code below doesn't support truncating stores, so we need to split it
2879 // again.
2880 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2881 unsigned SubReg;
2882 LLT MemTy = LdSt.getMMO().getMemoryType();
2883 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2884 if (!getSubRegForClass(RC, TRI, SubReg))
2885 return false;
2886
2887 // Generate a subreg copy.
2888 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2889 .addReg(ValReg, 0, SubReg)
2890 .getReg(0);
2891 RBI.constrainGenericRegister(Copy, *RC, MRI);
2892 LdSt.getOperand(0).setReg(Copy);
2893 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2894 // If this is an any-extending load from the FPR bank, split it into a regular
2895 // load + extend.
2896 if (RB.getID() == AArch64::FPRRegBankID) {
2897 unsigned SubReg;
2898 LLT MemTy = LdSt.getMMO().getMemoryType();
2899 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2900 if (!getSubRegForClass(RC, TRI, SubReg))
2901 return false;
2902 Register OldDst = LdSt.getReg(0);
2903 Register NewDst =
2904 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2905 LdSt.getOperand(0).setReg(NewDst);
2906 MRI.setRegBank(NewDst, RB);
2907 // Generate a SUBREG_TO_REG to extend it.
2908 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2909 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2910 .addImm(0)
2911 .addUse(NewDst)
2912 .addImm(SubReg);
2913 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2914 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2915 MIB.setInstr(LdSt);
2916 }
2917 }
2918
2919 // Helper lambda for partially selecting I. Either returns the original
2920 // instruction with an updated opcode, or a new instruction.
2921 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2922 bool IsStore = isa<GStore>(I);
2923 const unsigned NewOpc =
2924 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2925 if (NewOpc == I.getOpcode())
2926 return nullptr;
2927 // Check if we can fold anything into the addressing mode.
2928 auto AddrModeFns =
2929 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2930 if (!AddrModeFns) {
2931 // Can't fold anything. Use the original instruction.
2932 I.setDesc(TII.get(NewOpc));
2933 I.addOperand(MachineOperand::CreateImm(0));
2934 return &I;
2935 }
2936
2937 // Folded something. Create a new instruction and return it.
2938 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2939 Register CurValReg = I.getOperand(0).getReg();
2940 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2941 NewInst.cloneMemRefs(I);
2942 for (auto &Fn : *AddrModeFns)
2943 Fn(NewInst);
2944 I.eraseFromParent();
2945 return &*NewInst;
2946 };
2947
2948 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2949 if (!LoadStore)
2950 return false;
2951
2952 // If we're storing a 0, use WZR/XZR.
2953 if (Opcode == TargetOpcode::G_STORE) {
2954 auto CVal = getIConstantVRegValWithLookThrough(
2955 LoadStore->getOperand(0).getReg(), MRI);
2956 if (CVal && CVal->Value == 0) {
2957 switch (LoadStore->getOpcode()) {
2958 case AArch64::STRWui:
2959 case AArch64::STRHHui:
2960 case AArch64::STRBBui:
2961 LoadStore->getOperand(0).setReg(AArch64::WZR);
2962 break;
2963 case AArch64::STRXui:
2964 LoadStore->getOperand(0).setReg(AArch64::XZR);
2965 break;
2966 }
2967 }
2968 }
2969
2970 if (IsZExtLoad) {
2971 // The zextload from a smaller type to i32 should be handled by the
2972 // importer.
2973 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2974 return false;
2975 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2976 // and zero_extend with SUBREG_TO_REG.
2977 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2978 Register DstReg = LoadStore->getOperand(0).getReg();
2979 LoadStore->getOperand(0).setReg(LdReg);
2980
2981 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2982 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2983 .addImm(0)
2984 .addUse(LdReg)
2985 .addImm(AArch64::sub_32);
2986 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2987 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2988 MRI);
2989 }
2990 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2991 }
2992
2993 case TargetOpcode::G_SMULH:
2994 case TargetOpcode::G_UMULH: {
2995 // Reject the various things we don't support yet.
2996 if (unsupportedBinOp(I, RBI, MRI, TRI))
2997 return false;
2998
2999 const Register DefReg = I.getOperand(0).getReg();
3000 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3001
3002 if (RB.getID() != AArch64::GPRRegBankID) {
3003 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
3004 return false;
3005 }
3006
3007 if (Ty != LLT::scalar(64)) {
3008 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
3009 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
3010 return false;
3011 }
3012
3013 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3014 : AArch64::UMULHrr;
3015 I.setDesc(TII.get(NewOpc));
3016
3017 // Now that we selected an opcode, we need to constrain the register
3018 // operands to use appropriate classes.
3019 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3020 }
3021 case TargetOpcode::G_LSHR:
3022 case TargetOpcode::G_ASHR:
3023 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3024 return selectVectorAshrLshr(I, MRI);
3025 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3026 case TargetOpcode::G_SHL:
3027 if (Opcode == TargetOpcode::G_SHL &&
3028 MRI.getType(I.getOperand(0).getReg()).isVector())
3029 return selectVectorSHL(I, MRI);
3030
3031 // These shifts were legalized to have 64 bit shift amounts because we
3032 // want to take advantage of the selection patterns that assume the
3033 // immediates are s64s, however, selectBinaryOp will assume both operands
3034 // will have the same bit size.
3035 {
3036 Register SrcReg = I.getOperand(1).getReg();
3037 Register ShiftReg = I.getOperand(2).getReg();
3038 const LLT ShiftTy = MRI.getType(ShiftReg);
3039 const LLT SrcTy = MRI.getType(SrcReg);
3040 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3041 ShiftTy.getSizeInBits() == 64) {
3042 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3042, __extension__ __PRETTY_FUNCTION__))
;
3043 assert(MRI.getVRegDef(ShiftReg) &&(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3044, __extension__ __PRETTY_FUNCTION__))
3044 "could not find a vreg definition for shift amount")(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3044, __extension__ __PRETTY_FUNCTION__))
;
3045 // Insert a subregister copy to implement a 64->32 trunc
3046 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3047 .addReg(ShiftReg, 0, AArch64::sub_32);
3048 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3049 I.getOperand(2).setReg(Trunc.getReg(0));
3050 }
3051 }
3052 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3053 case TargetOpcode::G_OR: {
3054 // Reject the various things we don't support yet.
3055 if (unsupportedBinOp(I, RBI, MRI, TRI))
3056 return false;
3057
3058 const unsigned OpSize = Ty.getSizeInBits();
3059
3060 const Register DefReg = I.getOperand(0).getReg();
3061 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3062
3063 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3064 if (NewOpc == I.getOpcode())
3065 return false;
3066
3067 I.setDesc(TII.get(NewOpc));
3068 // FIXME: Should the type be always reset in setDesc?
3069
3070 // Now that we selected an opcode, we need to constrain the register
3071 // operands to use appropriate classes.
3072 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3073 }
3074
3075 case TargetOpcode::G_PTR_ADD: {
3076 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3077 I.eraseFromParent();
3078 return true;
3079 }
3080 case TargetOpcode::G_SADDO:
3081 case TargetOpcode::G_UADDO:
3082 case TargetOpcode::G_SSUBO:
3083 case TargetOpcode::G_USUBO: {
3084 // Emit the operation and get the correct condition code.
3085 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3086 I.getOperand(2), I.getOperand(3), MIB);
3087
3088 // Now, put the overflow result in the register given by the first operand
3089 // to the overflow op. CSINC increments the result when the predicate is
3090 // false, so to get the increment when it's true, we need to use the
3091 // inverse. In this case, we want to increment when carry is set.
3092 Register ZReg = AArch64::WZR;
3093 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3094 getInvertedCondCode(OpAndCC.second), MIB);
3095 I.eraseFromParent();
3096 return true;
3097 }
3098
3099 case TargetOpcode::G_PTRMASK: {
3100 Register MaskReg = I.getOperand(2).getReg();
3101 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3102 // TODO: Implement arbitrary cases
3103 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3104 return false;
3105
3106 uint64_t Mask = *MaskVal;
3107 I.setDesc(TII.get(AArch64::ANDXri));
3108 I.getOperand(2).ChangeToImmediate(
3109 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3110
3111 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3112 }
3113 case TargetOpcode::G_PTRTOINT:
3114 case TargetOpcode::G_TRUNC: {
3115 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3116 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3117
3118 const Register DstReg = I.getOperand(0).getReg();
3119 const Register SrcReg = I.getOperand(1).getReg();
3120
3121 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3122 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3123
3124 if (DstRB.getID() != SrcRB.getID()) {
3125 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3126 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3127 return false;
3128 }
3129
3130 if (DstRB.getID() == AArch64::GPRRegBankID) {
3131 const TargetRegisterClass *DstRC =
3132 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3133 if (!DstRC)
3134 return false;
3135
3136 const TargetRegisterClass *SrcRC =
3137 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3138 if (!SrcRC)
3139 return false;
3140
3141 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3142 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3143 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3144 return false;
3145 }
3146
3147 if (DstRC == SrcRC) {
3148 // Nothing to be done
3149 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3150 SrcTy == LLT::scalar(64)) {
3151 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3151)
;
3152 return false;
3153 } else if (DstRC == &AArch64::GPR32RegClass &&
3154 SrcRC == &AArch64::GPR64RegClass) {
3155 I.getOperand(1).setSubReg(AArch64::sub_32);
3156 } else {
3157 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3158 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3159 return false;
3160 }
3161
3162 I.setDesc(TII.get(TargetOpcode::COPY));
3163 return true;
3164 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3165 if (DstTy == LLT::fixed_vector(4, 16) &&
3166 SrcTy == LLT::fixed_vector(4, 32)) {
3167 I.setDesc(TII.get(AArch64::XTNv4i16));
3168 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3169 return true;
3170 }
3171
3172 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3173 MachineInstr *Extract = emitExtractVectorElt(
3174 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3175 if (!Extract)
3176 return false;
3177 I.eraseFromParent();
3178 return true;
3179 }
3180
3181 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3182 if (Opcode == TargetOpcode::G_PTRTOINT) {
3183 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3183, __extension__ __PRETTY_FUNCTION__))
;
3184 I.setDesc(TII.get(TargetOpcode::COPY));
3185 return selectCopy(I, TII, MRI, TRI, RBI);
3186 }
3187 }
3188
3189 return false;
3190 }
3191
3192 case TargetOpcode::G_ANYEXT: {
3193 if (selectUSMovFromExtend(I, MRI))
3194 return true;
3195
3196 const Register DstReg = I.getOperand(0).getReg();
3197 const Register SrcReg = I.getOperand(1).getReg();
3198
3199 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3200 if (RBDst.getID() != AArch64::GPRRegBankID) {
3201 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3202 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3203 return false;
3204 }
3205
3206 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3207 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3208 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3209 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3210 return false;
3211 }
3212
3213 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3214
3215 if (DstSize == 0) {
3216 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3217 return false;
3218 }
3219
3220 if (DstSize != 64 && DstSize > 32) {
3221 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3222 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3223 return false;
3224 }
3225 // At this point G_ANYEXT is just like a plain COPY, but we need
3226 // to explicitly form the 64-bit value if any.
3227 if (DstSize > 32) {
3228 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3229 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3230 .addDef(ExtSrc)
3231 .addImm(0)
3232 .addUse(SrcReg)
3233 .addImm(AArch64::sub_32);
3234 I.getOperand(1).setReg(ExtSrc);
3235 }
3236 return selectCopy(I, TII, MRI, TRI, RBI);
3237 }
3238
3239 case TargetOpcode::G_ZEXT:
3240 case TargetOpcode::G_SEXT_INREG:
3241 case TargetOpcode::G_SEXT: {
3242 if (selectUSMovFromExtend(I, MRI))
3243 return true;
3244
3245 unsigned Opcode = I.getOpcode();
3246 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3247 const Register DefReg = I.getOperand(0).getReg();
3248 Register SrcReg = I.getOperand(1).getReg();
3249 const LLT DstTy = MRI.getType(DefReg);
3250 const LLT SrcTy = MRI.getType(SrcReg);
3251 unsigned DstSize = DstTy.getSizeInBits();
3252 unsigned SrcSize = SrcTy.getSizeInBits();
3253
3254 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3255 // extended is encoded in the imm.
3256 if (Opcode == TargetOpcode::G_SEXT_INREG)
3257 SrcSize = I.getOperand(2).getImm();
3258
3259 if (DstTy.isVector())
3260 return false; // Should be handled by imported patterns.
3261
3262 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3264, __extension__ __PRETTY_FUNCTION__))
3263 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3264, __extension__ __PRETTY_FUNCTION__))
3264 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3264, __extension__ __PRETTY_FUNCTION__))
;
3265
3266 MachineInstr *ExtI;
3267
3268 // First check if we're extending the result of a load which has a dest type
3269 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3270 // GPR register on AArch64 and all loads which are smaller automatically
3271 // zero-extend the upper bits. E.g.
3272 // %v(s8) = G_LOAD %p, :: (load 1)
3273 // %v2(s32) = G_ZEXT %v(s8)
3274 if (!IsSigned) {
3275 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3276 bool IsGPR =
3277 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3278 if (LoadMI && IsGPR) {
3279 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3280 unsigned BytesLoaded = MemOp->getSize();
3281 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3282 return selectCopy(I, TII, MRI, TRI, RBI);
3283 }
3284
3285 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3286 // + SUBREG_TO_REG.
3287 //
3288 // If we are zero extending from 32 bits to 64 bits, it's possible that
3289 // the instruction implicitly does the zero extend for us. In that case,
3290 // we only need the SUBREG_TO_REG.
3291 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3292 // Unlike with the G_LOAD case, we don't want to look through copies
3293 // here. (See isDef32.)
3294 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3295 Register SubregToRegSrc = SrcReg;
3296
3297 // Does the instruction implicitly zero extend?
3298 if (!Def || !isDef32(*Def)) {
3299 // No. Zero out using an OR.
3300 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3301 const Register ZReg = AArch64::WZR;
3302 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3303 SubregToRegSrc = OrDst;
3304 }
3305
3306 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3307 .addImm(0)
3308 .addUse(SubregToRegSrc)
3309 .addImm(AArch64::sub_32);
3310
3311 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3312 MRI)) {
3313 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3314 return false;
3315 }
3316
3317 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3318 MRI)) {
3319 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3320 return false;
3321 }
3322
3323 I.eraseFromParent();
3324 return true;
3325 }
3326 }
3327
3328 if (DstSize == 64) {
3329 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3330 // FIXME: Can we avoid manually doing this?
3331 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3332 MRI)) {
3333 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3334 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3335 return false;
3336 }
3337 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3338 {&AArch64::GPR64RegClass}, {})
3339 .addImm(0)
3340 .addUse(SrcReg)
3341 .addImm(AArch64::sub_32)
3342 .getReg(0);
3343 }
3344
3345 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3346 {DefReg}, {SrcReg})
3347 .addImm(0)
3348 .addImm(SrcSize - 1);
3349 } else if (DstSize <= 32) {
3350 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3351 {DefReg}, {SrcReg})
3352 .addImm(0)
3353 .addImm(SrcSize - 1);
3354 } else {
3355 return false;
3356 }
3357
3358 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3359 I.eraseFromParent();
3360 return true;
3361 }
3362
3363 case TargetOpcode::G_SITOFP:
3364 case TargetOpcode::G_UITOFP:
3365 case TargetOpcode::G_FPTOSI:
3366 case TargetOpcode::G_FPTOUI: {
3367 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3368 SrcTy = MRI.getType(I.getOperand(1).getReg());
3369 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3370 if (NewOpc == Opcode)
3371 return false;
3372
3373 I.setDesc(TII.get(NewOpc));
3374 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3375 I.setFlags(MachineInstr::NoFPExcept);
3376
3377 return true;
3378 }
3379
3380 case TargetOpcode::G_FREEZE:
3381 return selectCopy(I, TII, MRI, TRI, RBI);
3382
3383 case TargetOpcode::G_INTTOPTR:
3384 // The importer is currently unable to import pointer types since they
3385 // didn't exist in SelectionDAG.
3386 return selectCopy(I, TII, MRI, TRI, RBI);
3387
3388 case TargetOpcode::G_BITCAST:
3389 // Imported SelectionDAG rules can handle every bitcast except those that
3390 // bitcast from a type to the same type. Ideally, these shouldn't occur
3391 // but we might not run an optimizer that deletes them. The other exception
3392 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3393 // of them.
3394 return selectCopy(I, TII, MRI, TRI, RBI);
3395
3396 case TargetOpcode::G_SELECT: {
3397 auto &Sel = cast<GSelect>(I);
3398 if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
3399 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3400 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3401 return false;
3402 }
3403
3404 const Register CondReg = Sel.getCondReg();
3405 const Register TReg = Sel.getTrueReg();
3406 const Register FReg = Sel.getFalseReg();
3407
3408 if (tryOptSelect(Sel))
3409 return true;
3410
3411 // Make sure to use an unused vreg instead of wzr, so that the peephole
3412 // optimizations will be able to optimize these.
3413 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3414 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3415 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3416 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3417 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3418 return false;
3419 Sel.eraseFromParent();
3420 return true;
3421 }
3422 case TargetOpcode::G_ICMP: {
3423 if (Ty.isVector())
3424 return selectVectorICmp(I, MRI);
3425
3426 if (Ty != LLT::scalar(32)) {
3427 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3428 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3429 return false;
3430 }
3431
3432 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3433 const AArch64CC::CondCode InvCC =
3434 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3435 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3436 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3437 /*Src2=*/AArch64::WZR, InvCC, MIB);
3438 I.eraseFromParent();
3439 return true;
3440 }
3441
3442 case TargetOpcode::G_FCMP: {
3443 CmpInst::Predicate Pred =
3444 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3445 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3446 Pred) ||
3447 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3448 return false;
3449 I.eraseFromParent();
3450 return true;
3451 }
3452 case TargetOpcode::G_VASTART:
3453 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3454 : selectVaStartAAPCS(I, MF, MRI);
3455 case TargetOpcode::G_INTRINSIC:
3456 return selectIntrinsic(I, MRI);
3457 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3458 return selectIntrinsicWithSideEffects(I, MRI);
3459 case TargetOpcode::G_IMPLICIT_DEF: {
3460 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3461 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3462 const Register DstReg = I.getOperand(0).getReg();
3463 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3464 const TargetRegisterClass *DstRC =
3465 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3466 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3467 return true;
3468 }
3469 case TargetOpcode::G_BLOCK_ADDR: {
3470 if (TM.getCodeModel() == CodeModel::Large) {
3471 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3472 I.eraseFromParent();
3473 return true;
3474 } else {
3475 I.setDesc(TII.get(AArch64::MOVaddrBA));
3476 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3477 I.getOperand(0).getReg())
3478 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3479 /* Offset */ 0, AArch64II::MO_PAGE)
3480 .addBlockAddress(
3481 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3482 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3483 I.eraseFromParent();
3484 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3485 }
3486 }
3487 case AArch64::G_DUP: {
3488 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3489 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3490 // difficult because at RBS we may end up pessimizing the fpr case if we
3491 // decided to add an anyextend to fix this. Manual selection is the most
3492 // robust solution for now.
3493 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3494 AArch64::GPRRegBankID)
3495 return false; // We expect the fpr regbank case to be imported.
3496 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3497 if (VecTy == LLT::fixed_vector(8, 8))
3498 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3499 else if (VecTy == LLT::fixed_vector(16, 8))
3500 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3501 else if (VecTy == LLT::fixed_vector(4, 16))
3502 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3503 else if (VecTy == LLT::fixed_vector(8, 16))
3504 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3505 else
3506 return false;
3507 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3508 }
3509 case TargetOpcode::G_INTRINSIC_TRUNC:
3510 return selectIntrinsicTrunc(I, MRI);
3511 case TargetOpcode::G_INTRINSIC_ROUND:
3512 return selectIntrinsicRound(I, MRI);
3513 case TargetOpcode::G_BUILD_VECTOR:
3514 return selectBuildVector(I, MRI);
3515 case TargetOpcode::G_MERGE_VALUES:
3516 return selectMergeValues(I, MRI);
3517 case TargetOpcode::G_UNMERGE_VALUES:
3518 return selectUnmergeValues(I, MRI);
3519 case TargetOpcode::G_SHUFFLE_VECTOR:
3520 return selectShuffleVector(I, MRI);
3521 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3522 return selectExtractElt(I, MRI);
3523 case TargetOpcode::G_INSERT_VECTOR_ELT:
3524 return selectInsertElt(I, MRI);
3525 case TargetOpcode::G_CONCAT_VECTORS:
3526 return selectConcatVectors(I, MRI);
3527 case TargetOpcode::G_JUMP_TABLE:
3528 return selectJumpTable(I, MRI);
3529 case TargetOpcode::G_VECREDUCE_FADD:
3530 case TargetOpcode::G_VECREDUCE_ADD:
3531 return selectReduction(I, MRI);
3532 case TargetOpcode::G_MEMCPY:
3533 case TargetOpcode::G_MEMCPY_INLINE:
3534 case TargetOpcode::G_MEMMOVE:
3535 case TargetOpcode::G_MEMSET:
3536 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3536, __extension__ __PRETTY_FUNCTION__))
;
3537 return selectMOPS(I, MRI);
3538 }
3539
3540 return false;
3541}
3542
3543bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3544 MachineRegisterInfo &MRI) {
3545 Register VecReg = I.getOperand(1).getReg();
3546 LLT VecTy = MRI.getType(VecReg);
3547 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3548 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3549 // a subregister copy afterwards.
3550 if (VecTy == LLT::fixed_vector(2, 32)) {
3551 Register DstReg = I.getOperand(0).getReg();
3552 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3553 {VecReg, VecReg});
3554 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3555 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3556 .getReg(0);
3557 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3558 I.eraseFromParent();
3559 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3560 }
3561
3562 unsigned Opc = 0;
3563 if (VecTy == LLT::fixed_vector(16, 8))
3564 Opc = AArch64::ADDVv16i8v;
3565 else if (VecTy == LLT::fixed_vector(8, 16))
3566 Opc = AArch64::ADDVv8i16v;
3567 else if (VecTy == LLT::fixed_vector(4, 32))
3568 Opc = AArch64::ADDVv4i32v;
3569 else if (VecTy == LLT::fixed_vector(2, 64))
3570 Opc = AArch64::ADDPv2i64p;
3571 else {
3572 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3573 return false;
3574 }
3575 I.setDesc(TII.get(Opc));
3576 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3577 }
3578
3579 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3580 unsigned Opc = 0;
3581 if (VecTy == LLT::fixed_vector(2, 32))
3582 Opc = AArch64::FADDPv2i32p;
3583 else if (VecTy == LLT::fixed_vector(2, 64))
3584 Opc = AArch64::FADDPv2i64p;
3585 else {
3586 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3587 return false;
3588 }
3589 I.setDesc(TII.get(Opc));
3590 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3591 }
3592 return false;
3593}
3594
3595bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3596 MachineRegisterInfo &MRI) {
3597 unsigned Mopcode;
3598 switch (GI.getOpcode()) {
3599 case TargetOpcode::G_MEMCPY:
3600 case TargetOpcode::G_MEMCPY_INLINE:
3601 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3602 break;
3603 case TargetOpcode::G_MEMMOVE:
3604 Mopcode = AArch64::MOPSMemoryMovePseudo;
3605 break;
3606 case TargetOpcode::G_MEMSET:
3607 // For tagged memset see llvm.aarch64.mops.memset.tag
3608 Mopcode = AArch64::MOPSMemorySetPseudo;
3609 break;
3610 }
3611
3612 auto &DstPtr = GI.getOperand(0);
3613 auto &SrcOrVal = GI.getOperand(1);
3614 auto &Size = GI.getOperand(2);
3615
3616 // Create copies of the registers that can be clobbered.
3617 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3618 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3619 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3620
3621 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3622 const auto &SrcValRegClass =
3623 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3624
3625 // Constrain to specific registers
3626 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3627 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3628 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3629
3630 MIB.buildCopy(DstPtrCopy, DstPtr);
3631 MIB.buildCopy(SrcValCopy, SrcOrVal);
3632 MIB.buildCopy(SizeCopy, Size);
3633
3634 // New instruction uses the copied registers because it must update them.
3635 // The defs are not used since they don't exist in G_MEM*. They are still
3636 // tied.
3637 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3638 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3639 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3640 if (IsSet) {
3641 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3642 {DstPtrCopy, SizeCopy, SrcValCopy});
3643 } else {
3644 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3645 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3646 {DstPtrCopy, SrcValCopy, SizeCopy});
3647 }
3648
3649 GI.eraseFromParent();
3650 return true;
3651}
3652
3653bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3654 MachineRegisterInfo &MRI) {
3655 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3655, __extension__ __PRETTY_FUNCTION__))
;
3656 Register JTAddr = I.getOperand(0).getReg();
3657 unsigned JTI = I.getOperand(1).getIndex();
3658 Register Index = I.getOperand(2).getReg();
3659
3660 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3661 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3662
3663 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3664 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3665 {TargetReg, ScratchReg}, {JTAddr, Index})
3666 .addJumpTableIndex(JTI);
3667 // Build the indirect branch.
3668 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3669 I.eraseFromParent();
3670 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3671}
3672
3673bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3674 MachineRegisterInfo &MRI) {
3675 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3675, __extension__ __PRETTY_FUNCTION__))
;
3676 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
;
3677
3678 Register DstReg = I.getOperand(0).getReg();
3679 unsigned JTI = I.getOperand(1).getIndex();
3680 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3681 auto MovMI =
3682 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3683 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3684 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3685 I.eraseFromParent();
3686 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3687}
3688
3689bool AArch64InstructionSelector::selectTLSGlobalValue(
3690 MachineInstr &I, MachineRegisterInfo &MRI) {
3691 if (!STI.isTargetMachO())
3692 return false;
3693 MachineFunction &MF = *I.getParent()->getParent();
3694 MF.getFrameInfo().setAdjustsStack(true);
3695
3696 const auto &GlobalOp = I.getOperand(1);
3697 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3698, __extension__ __PRETTY_FUNCTION__))
3698 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3698, __extension__ __PRETTY_FUNCTION__))
;
3699 const GlobalValue &GV = *GlobalOp.getGlobal();
3700
3701 auto LoadGOT =
3702 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3703 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3704
3705 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3706 {LoadGOT.getReg(0)})
3707 .addImm(0);
3708
3709 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3710 // TLS calls preserve all registers except those that absolutely must be
3711 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3712 // silly).
3713 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3714 .addUse(AArch64::X0, RegState::Implicit)
3715 .addDef(AArch64::X0, RegState::Implicit)
3716 .addRegMask(TRI.getTLSCallPreservedMask());
3717
3718 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3719 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3720 MRI);
3721 I.eraseFromParent();
3722 return true;
3723}
3724
3725bool AArch64InstructionSelector::selectIntrinsicTrunc(
3726 MachineInstr &I, MachineRegisterInfo &MRI) const {
3727 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3728
3729 // Select the correct opcode.
3730 unsigned Opc = 0;
3731 if (!SrcTy.isVector()) {
3732 switch (SrcTy.getSizeInBits()) {
3733 default:
3734 case 16:
3735 Opc = AArch64::FRINTZHr;
3736 break;
3737 case 32:
3738 Opc = AArch64::FRINTZSr;
3739 break;
3740 case 64:
3741 Opc = AArch64::FRINTZDr;
3742 break;
3743 }
3744 } else {
3745 unsigned NumElts = SrcTy.getNumElements();
3746 switch (SrcTy.getElementType().getSizeInBits()) {
3747 default:
3748 break;
3749 case 16:
3750 if (NumElts == 4)
3751 Opc = AArch64::FRINTZv4f16;
3752 else if (NumElts == 8)
3753 Opc = AArch64::FRINTZv8f16;
3754 break;
3755 case 32:
3756 if (NumElts == 2)
3757 Opc = AArch64::FRINTZv2f32;
3758 else if (NumElts == 4)
3759 Opc = AArch64::FRINTZv4f32;
3760 break;
3761 case 64:
3762 if (NumElts == 2)
3763 Opc = AArch64::FRINTZv2f64;
3764 break;
3765 }
3766 }
3767
3768 if (!Opc) {
3769 // Didn't get an opcode above, bail.
3770 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3771 return false;
3772 }
3773
3774 // Legalization would have set us up perfectly for this; we just need to
3775 // set the opcode and move on.
3776 I.setDesc(TII.get(Opc));
3777 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3778}
3779
3780bool AArch64InstructionSelector::selectIntrinsicRound(
3781 MachineInstr &I, MachineRegisterInfo &MRI) const {
3782 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3783
3784 // Select the correct opcode.
3785 unsigned Opc = 0;
3786 if (!SrcTy.isVector()) {
3787 switch (SrcTy.getSizeInBits()) {
3788 default:
3789 case 16:
3790 Opc = AArch64::FRINTAHr;
3791 break;
3792 case 32:
3793 Opc = AArch64::FRINTASr;
3794 break;
3795 case 64:
3796 Opc = AArch64::FRINTADr;
3797 break;
3798 }
3799 } else {
3800 unsigned NumElts = SrcTy.getNumElements();
3801 switch (SrcTy.getElementType().getSizeInBits()) {
3802 default:
3803 break;
3804 case 16:
3805 if (NumElts == 4)
3806 Opc = AArch64::FRINTAv4f16;
3807 else if (NumElts == 8)
3808 Opc = AArch64::FRINTAv8f16;
3809 break;
3810 case 32:
3811 if (NumElts == 2)
3812 Opc = AArch64::FRINTAv2f32;
3813 else if (NumElts == 4)
3814 Opc = AArch64::FRINTAv4f32;
3815 break;
3816 case 64:
3817 if (NumElts == 2)
3818 Opc = AArch64::FRINTAv2f64;
3819 break;
3820 }
3821 }
3822
3823 if (!Opc) {
3824 // Didn't get an opcode above, bail.
3825 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3826 return false;
3827 }
3828
3829 // Legalization would have set us up perfectly for this; we just need to
3830 // set the opcode and move on.
3831 I.setDesc(TII.get(Opc));
3832 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3833}
3834
3835bool AArch64InstructionSelector::selectVectorICmp(
3836 MachineInstr &I, MachineRegisterInfo &MRI) {
3837 Register DstReg = I.getOperand(0).getReg();
3838 LLT DstTy = MRI.getType(DstReg);
3839 Register SrcReg = I.getOperand(2).getReg();
3840 Register Src2Reg = I.getOperand(3).getReg();
3841 LLT SrcTy = MRI.getType(SrcReg);
3842
3843 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3844 unsigned NumElts = DstTy.getNumElements();
3845
3846 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3847 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3848 // Third index is cc opcode:
3849 // 0 == eq
3850 // 1 == ugt
3851 // 2 == uge
3852 // 3 == ult
3853 // 4 == ule
3854 // 5 == sgt
3855 // 6 == sge
3856 // 7 == slt
3857 // 8 == sle
3858 // ne is done by negating 'eq' result.
3859
3860 // This table below assumes that for some comparisons the operands will be
3861 // commuted.
3862 // ult op == commute + ugt op
3863 // ule op == commute + uge op
3864 // slt op == commute + sgt op
3865 // sle op == commute + sge op
3866 unsigned PredIdx = 0;
3867 bool SwapOperands = false;
3868 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3869 switch (Pred) {
3870 case CmpInst::ICMP_NE:
3871 case CmpInst::ICMP_EQ:
3872 PredIdx = 0;
3873 break;
3874 case CmpInst::ICMP_UGT:
3875 PredIdx = 1;
3876 break;
3877 case CmpInst::ICMP_UGE:
3878 PredIdx = 2;
3879 break;
3880 case CmpInst::ICMP_ULT:
3881 PredIdx = 3;
3882 SwapOperands = true;
3883 break;
3884 case CmpInst::ICMP_ULE:
3885 PredIdx = 4;
3886 SwapOperands = true;
3887 break;
3888 case CmpInst::ICMP_SGT:
3889 PredIdx = 5;
3890 break;
3891 case CmpInst::ICMP_SGE:
3892 PredIdx = 6;
3893 break;
3894 case CmpInst::ICMP_SLT:
3895 PredIdx = 7;
3896 SwapOperands = true;
3897 break;
3898 case CmpInst::ICMP_SLE:
3899 PredIdx = 8;
3900 SwapOperands = true;
3901 break;
3902 default:
3903 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3903)
;
3904 return false;
3905 }
3906
3907 // This table obviously should be tablegen'd when we have our GISel native
3908 // tablegen selector.
3909
3910 static const unsigned OpcTable[4][4][9] = {
3911 {
3912 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3913 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3914 0 /* invalid */},
3915 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3916 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3917 0 /* invalid */},
3918 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3919 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3920 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3921 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3922 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3923 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3924 },
3925 {
3926 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3927 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3928 0 /* invalid */},
3929 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3930 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3931 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3932 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3933 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3934 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3935 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3936 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3937 0 /* invalid */}
3938 },
3939 {
3940 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3941 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3942 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3943 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3944 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3945 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3946 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3947 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3948 0 /* invalid */},
3949 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3950 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3951 0 /* invalid */}
3952 },
3953 {
3954 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3955 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3956 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3957 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3958 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3959 0 /* invalid */},
3960 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3961 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3962 0 /* invalid */},
3963 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3964 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3965 0 /* invalid */}
3966 },
3967 };
3968 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3969 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3970 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3971 if (!Opc) {
3972 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3973 return false;
3974 }
3975
3976 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3977 const TargetRegisterClass *SrcRC =
3978 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3979 if (!SrcRC) {
3980 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3981 return false;
3982 }
3983
3984 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3985 if (SrcTy.getSizeInBits() == 128)
3986 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3987
3988 if (SwapOperands)
3989 std::swap(SrcReg, Src2Reg);
3990
3991 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3992 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3993
3994 // Invert if we had a 'ne' cc.
3995 if (NotOpc) {
3996 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3997 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3998 } else {
3999 MIB.buildCopy(DstReg, Cmp.getReg(0));
4000 }
4001 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
4002 I.eraseFromParent();
4003 return true;
4004}
4005
4006MachineInstr *AArch64InstructionSelector::emitScalarToVector(
4007 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
4008 MachineIRBuilder &MIRBuilder) const {
4009 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
4010
4011 auto BuildFn = [&](unsigned SubregIndex) {
4012 auto Ins =
4013 MIRBuilder
4014 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
4015 .addImm(SubregIndex);
4016 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
4017 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
4018 return &*Ins;
4019 };
4020
4021 switch (EltSize) {
4022 case 16:
4023 return BuildFn(AArch64::hsub);
4024 case 32:
4025 return BuildFn(AArch64::ssub);
4026 case 64:
4027 return BuildFn(AArch64::dsub);
4028 default:
4029 return nullptr;
4030 }
4031}
4032
4033bool AArch64InstructionSelector::selectMergeValues(
4034 MachineInstr &I, MachineRegisterInfo &MRI) {
4035 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4035, __extension__ __PRETTY_FUNCTION__))
;
4036 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4037 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4038 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4038, __extension__ __PRETTY_FUNCTION__))
;
4039 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4040
4041 if (I.getNumOperands() != 3)
4042 return false;
4043
4044 // Merging 2 s64s into an s128.
4045 if (DstTy == LLT::scalar(128)) {
4046 if (SrcTy.getSizeInBits() != 64)
4047 return false;
4048 Register DstReg = I.getOperand(0).getReg();
4049 Register Src1Reg = I.getOperand(1).getReg();
4050 Register Src2Reg = I.getOperand(2).getReg();
4051 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4052 MachineInstr *InsMI =
4053 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
4054 if (!InsMI)
4055 return false;
4056 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4057 Src2Reg, /* LaneIdx */ 1, RB, MIB);
4058 if (!Ins2MI)
4059 return false;
4060 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4061 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4062 I.eraseFromParent();
4063 return true;
4064 }
4065
4066 if (RB.getID() != AArch64::GPRRegBankID)
4067 return false;
4068
4069 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4070 return false;
4071
4072 auto *DstRC = &AArch64::GPR64RegClass;
4073 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4074 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4075 TII.get(TargetOpcode::SUBREG_TO_REG))
4076 .addDef(SubToRegDef)
4077 .addImm(0)
4078 .addUse(I.getOperand(1).getReg())
4079 .addImm(AArch64::sub_32);
4080 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4081 // Need to anyext the second scalar before we can use bfm
4082 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4083 TII.get(TargetOpcode::SUBREG_TO_REG))
4084 .addDef(SubToRegDef2)
4085 .addImm(0)
4086 .addUse(I.getOperand(2).getReg())
4087 .addImm(AArch64::sub_32);
4088 MachineInstr &BFM =
4089 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4090 .addDef(I.getOperand(0).getReg())
4091 .addUse(SubToRegDef)
4092 .addUse(SubToRegDef2)
4093 .addImm(32)
4094 .addImm(31);
4095 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4096 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4097 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4098 I.eraseFromParent();
4099 return true;
4100}
4101
4102static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4103 const unsigned EltSize) {
4104 // Choose a lane copy opcode and subregister based off of the size of the
4105 // vector's elements.
4106 switch (EltSize) {
4107 case 8:
4108 CopyOpc = AArch64::DUPi8;
4109 ExtractSubReg = AArch64::bsub;
4110 break;
4111 case 16:
4112 CopyOpc = AArch64::DUPi16;
4113 ExtractSubReg = AArch64::hsub;
4114 break;
4115 case 32:
4116 CopyOpc = AArch64::DUPi32;
4117 ExtractSubReg = AArch64::ssub;
4118 break;
4119 case 64:
4120 CopyOpc = AArch64::DUPi64;
4121 ExtractSubReg = AArch64::dsub;
4122 break;
4123 default:
4124 // Unknown size, bail out.
4125 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4126 return false;
4127 }
4128 return true;
4129}
4130
4131MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4132 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4133 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4134 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4135 unsigned CopyOpc = 0;
4136 unsigned ExtractSubReg = 0;
4137 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4138 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4139 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4140 return nullptr;
4141 }
4142
4143 const TargetRegisterClass *DstRC =
4144 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
4145 if (!DstRC) {
4146 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4147 return nullptr;
4148 }
4149
4150 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4151 const LLT &VecTy = MRI.getType(VecReg);
4152 const TargetRegisterClass *VecRC =
4153 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
4154 if (!VecRC) {
4155 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4156 return nullptr;
4157 }
4158
4159 // The register that we're going to copy into.
4160 Register InsertReg = VecReg;
4161 if (!DstReg)
4162 DstReg = MRI.createVirtualRegister(DstRC);
4163 // If the lane index is 0, we just use a subregister COPY.
4164 if (LaneIdx == 0) {
4165 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4166 .addReg(VecReg, 0, ExtractSubReg);
4167 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4168 return &*Copy;
4169 }
4170
4171 // Lane copies require 128-bit wide registers. If we're dealing with an
4172 // unpacked vector, then we need to move up to that width. Insert an implicit
4173 // def and a subregister insert to get us there.
4174 if (VecTy.getSizeInBits() != 128) {
4175 MachineInstr *ScalarToVector = emitScalarToVector(
4176 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4177 if (!ScalarToVector)
4178 return nullptr;
4179 InsertReg = ScalarToVector->getOperand(0).getReg();
4180 }
4181
4182 MachineInstr *LaneCopyMI =
4183 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4184 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4185
4186 // Make sure that we actually constrain the initial copy.
4187 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4188 return LaneCopyMI;
4189}
4190
4191bool AArch64InstructionSelector::selectExtractElt(
4192 MachineInstr &I, MachineRegisterInfo &MRI) {
4193 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4194, __extension__ __PRETTY_FUNCTION__))
4194 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4194, __extension__ __PRETTY_FUNCTION__))
;
4195 Register DstReg = I.getOperand(0).getReg();
4196 const LLT NarrowTy = MRI.getType(DstReg);
4197 const Register SrcReg = I.getOperand(1).getReg();
4198 const LLT WideTy = MRI.getType(SrcReg);
4199 (void)WideTy;
4200 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4201, __extension__ __PRETTY_FUNCTION__))
4201 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4201, __extension__ __PRETTY_FUNCTION__))
;
4202 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4202, __extension__ __PRETTY_FUNCTION__))
;
4203
4204 // Need the lane index to determine the correct copy opcode.
4205 MachineOperand &LaneIdxOp = I.getOperand(2);
4206 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4206, __extension__ __PRETTY_FUNCTION__))
;
4207
4208 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4209 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4210 return false;
4211 }
4212
4213 // Find the index to extract from.
4214 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4215 if (!VRegAndVal)
4216 return false;
4217 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4218
4219
4220 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4221 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4222 LaneIdx, MIB);
4223 if (!Extract)
4224 return false;
4225
4226 I.eraseFromParent();
4227 return true;
4228}
4229
4230bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4231 MachineInstr &I, MachineRegisterInfo &MRI) {
4232 unsigned NumElts = I.getNumOperands() - 1;
4233 Register SrcReg = I.getOperand(NumElts).getReg();
4234 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4235 const LLT SrcTy = MRI.getType(SrcReg);
4236
4237 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4237, __extension__ __PRETTY_FUNCTION__))
;
4238 if (SrcTy.getSizeInBits() > 128) {
4239 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4240 return false;
4241 }
4242
4243 // We implement a split vector operation by treating the sub-vectors as
4244 // scalars and extracting them.
4245 const RegisterBank &DstRB =
4246 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4247 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4248 Register Dst = I.getOperand(OpIdx).getReg();
4249 MachineInstr *Extract =
4250 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4251 if (!Extract)
4252 return false;
4253 }
4254 I.eraseFromParent();
4255 return true;
4256}
4257
4258bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4259 MachineRegisterInfo &MRI) {
4260 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
4261 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
;
4262
4263 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4264 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4265 AArch64::FPRRegBankID ||
4266 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4267 AArch64::FPRRegBankID) {
4268 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4269 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4270 return false;
4271 }
4272
4273 // The last operand is the vector source register, and every other operand is
4274 // a register to unpack into.
4275 unsigned NumElts = I.getNumOperands() - 1;
4276 Register SrcReg = I.getOperand(NumElts).getReg();
4277 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4278 const LLT WideTy = MRI.getType(SrcReg);
4279 (void)WideTy;
4280 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4281, __extension__ __PRETTY_FUNCTION__))
4281 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4281, __extension__ __PRETTY_FUNCTION__))
;
4282 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4283, __extension__ __PRETTY_FUNCTION__))
4283 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4283, __extension__ __PRETTY_FUNCTION__))
;
4284
4285 if (!NarrowTy.isScalar())
4286 return selectSplitVectorUnmerge(I, MRI);
4287
4288 // Choose a lane copy opcode and subregister based off of the size of the
4289 // vector's elements.
4290 unsigned CopyOpc = 0;
4291 unsigned ExtractSubReg = 0;
4292 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4293 return false;
4294
4295 // Set up for the lane copies.
4296 MachineBasicBlock &MBB = *I.getParent();
4297
4298 // Stores the registers we'll be copying from.
4299 SmallVector<Register, 4> InsertRegs;
4300
4301 // We'll use the first register twice, so we only need NumElts-1 registers.
4302 unsigned NumInsertRegs = NumElts - 1;
4303
4304 // If our elements fit into exactly 128 bits, then we can copy from the source
4305 // directly. Otherwise, we need to do a bit of setup with some subregister
4306 // inserts.
4307 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4308 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4309 } else {
4310 // No. We have to perform subregister inserts. For each insert, create an
4311 // implicit def and a subregister insert, and save the register we create.
4312 const TargetRegisterClass *RC =
4313 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4314 WideTy.getScalarSizeInBits() * NumElts);
4315 unsigned SubReg = 0;
4316 bool Found = getSubRegForClass(RC, TRI, SubReg);
4317 (void)Found;
4318 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4318, __extension__ __PRETTY_FUNCTION__))
;
4319 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4320 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4321 MachineInstr &ImpDefMI =
4322 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4323 ImpDefReg);
4324
4325 // Now, create the subregister insert from SrcReg.
4326 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4327 MachineInstr &InsMI =
4328 *BuildMI(MBB, I, I.getDebugLoc(),
4329 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4330 .addUse(ImpDefReg)
4331 .addUse(SrcReg)
4332 .addImm(SubReg);
4333
4334 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4335 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4336
4337 // Save the register so that we can copy from it after.
4338 InsertRegs.push_back(InsertReg);
4339 }
4340 }
4341
4342 // Now that we've created any necessary subregister inserts, we can
4343 // create the copies.
4344 //
4345 // Perform the first copy separately as a subregister copy.
4346 Register CopyTo = I.getOperand(0).getReg();
4347 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4348 .addReg(InsertRegs[0], 0, ExtractSubReg);
4349 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4350
4351 // Now, perform the remaining copies as vector lane copies.
4352 unsigned LaneIdx = 1;
4353 for (Register InsReg : InsertRegs) {
4354 Register CopyTo = I.getOperand(LaneIdx).getReg();
4355 MachineInstr &CopyInst =
4356 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4357 .addUse(InsReg)
4358 .addImm(LaneIdx);
4359 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4360 ++LaneIdx;
4361 }
4362
4363 // Separately constrain the first copy's destination. Because of the
4364 // limitation in constrainOperandRegClass, we can't guarantee that this will
4365 // actually be constrained. So, do it ourselves using the second operand.
4366 const TargetRegisterClass *RC =
4367 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4368 if (!RC) {
4369 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4370 return false;
4371 }
4372
4373 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4374 I.eraseFromParent();
4375 return true;
4376}
4377
4378bool AArch64InstructionSelector::selectConcatVectors(
4379 MachineInstr &I, MachineRegisterInfo &MRI) {
4380 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4381, __extension__ __PRETTY_FUNCTION__))
4381 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4381, __extension__ __PRETTY_FUNCTION__))
;
4382 Register Dst = I.getOperand(0).getReg();
4383 Register Op1 = I.getOperand(1).getReg();
4384 Register Op2 = I.getOperand(2).getReg();
4385 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4386 if (!ConcatMI)
4387 return false;
4388 I.eraseFromParent();
4389 return true;
4390}
4391
4392unsigned
4393AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4394 MachineFunction &MF) const {
4395 Type *CPTy = CPVal->getType();
4396 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4397
4398 MachineConstantPool *MCP = MF.getConstantPool();
4399 return MCP->getConstantPoolIndex(CPVal, Alignment);
4400}
4401
4402MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4403 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4404 auto &MF = MIRBuilder.getMF();
4405 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4406
4407 auto Adrp =
4408 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4409 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4410
4411 MachineInstr *LoadMI = nullptr;
4412 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4413 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4414 switch (Size) {
4415 case 16:
4416 LoadMI =
4417 &*MIRBuilder
4418 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4419 .addConstantPoolIndex(CPIdx, 0,
4420 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4421 break;
4422 case 8:
4423 LoadMI =
4424 &*MIRBuilder
4425 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4426 .addConstantPoolIndex(CPIdx, 0,
4427 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4428 break;
4429 case 4:
4430 LoadMI =
4431 &*MIRBuilder
4432 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4433 .addConstantPoolIndex(CPIdx, 0,
4434 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4435 break;
4436 case 2:
4437 LoadMI =
4438 &*MIRBuilder
4439 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4440 .addConstantPoolIndex(CPIdx, 0,
4441 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4442 break;
4443 default:
4444 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4445 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4446 return nullptr;
4447 }
4448 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4449 MachineMemOperand::MOLoad,
4450 Size, Align(Size)));
4451 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4452 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4453 return LoadMI;
4454}
4455
4456/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4457/// size and RB.
4458static std::pair<unsigned, unsigned>
4459getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4460 unsigned Opc, SubregIdx;
4461 if (RB.getID() == AArch64::GPRRegBankID) {
4462 if (EltSize == 16) {
4463 Opc = AArch64::INSvi16gpr;
4464 SubregIdx = AArch64::ssub;
4465 } else if (EltSize == 32) {
4466 Opc = AArch64::INSvi32gpr;
4467 SubregIdx = AArch64::ssub;
4468 } else if (EltSize == 64) {
4469 Opc = AArch64::INSvi64gpr;
4470 SubregIdx = AArch64::dsub;
4471 } else {
4472 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4472)
;
4473 }
4474 } else {
4475 if (EltSize == 8) {
4476 Opc = AArch64::INSvi8lane;
4477 SubregIdx = AArch64::bsub;
4478 } else if (EltSize == 16) {
4479 Opc = AArch64::INSvi16lane;
4480 SubregIdx = AArch64::hsub;
4481 } else if (EltSize == 32) {
4482 Opc = AArch64::INSvi32lane;
4483 SubregIdx = AArch64::ssub;
4484 } else if (EltSize == 64) {
4485 Opc = AArch64::INSvi64lane;
4486 SubregIdx = AArch64::dsub;
4487 } else {
4488 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4488)
;
4489 }
4490 }
4491 return std::make_pair(Opc, SubregIdx);
4492}
4493
4494MachineInstr *AArch64InstructionSelector::emitInstr(
4495 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4496 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4497 const ComplexRendererFns &RenderFns) const {
4498 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4498, __extension__ __PRETTY_FUNCTION__))
;
4499 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4500, __extension__ __PRETTY_FUNCTION__))
4500 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4500, __extension__ __PRETTY_FUNCTION__))
;
4501 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4502 if (RenderFns)
4503 for (auto &Fn : *RenderFns)
4504 Fn(MI);
4505 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4506 return &*MI;
4507}
4508
4509MachineInstr *AArch64InstructionSelector::emitAddSub(
4510 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4511 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4512 MachineIRBuilder &MIRBuilder) const {
4513 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4514 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4514, __extension__ __PRETTY_FUNCTION__))
;
4515 auto Ty = MRI.getType(LHS.getReg());
4516 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4516, __extension__ __PRETTY_FUNCTION__))
;
4517 unsigned Size = Ty.getSizeInBits();
4518 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4518, __extension__ __PRETTY_FUNCTION__))
;
4519 bool Is32Bit = Size == 32;
4520
4521 // INSTRri form with positive arithmetic immediate.
4522 if (auto Fns = selectArithImmed(RHS))
4523 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4524 MIRBuilder, Fns);
4525
4526 // INSTRri form with negative arithmetic immediate.
4527 if (auto Fns = selectNegArithImmed(RHS))
4528 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4529 MIRBuilder, Fns);
4530
4531 // INSTRrx form.
4532 if (auto Fns = selectArithExtendedRegister(RHS))
4533 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4534 MIRBuilder, Fns);
4535
4536 // INSTRrs form.
4537 if (auto Fns = selectShiftedRegister(RHS))
4538 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4539 MIRBuilder, Fns);
4540 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4541 MIRBuilder);
4542}
4543
4544MachineInstr *
4545AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4546 MachineOperand &RHS,
4547 MachineIRBuilder &MIRBuilder) const {
4548 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4549 {{AArch64::ADDXri, AArch64::ADDWri},
4550 {AArch64::ADDXrs, AArch64::ADDWrs},
4551 {AArch64::ADDXrr, AArch64::ADDWrr},
4552 {AArch64::SUBXri, AArch64::SUBWri},
4553 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4554 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4555}
4556
4557MachineInstr *
4558AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4559 MachineOperand &RHS,
4560 MachineIRBuilder &MIRBuilder) const {
4561 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4562 {{AArch64::ADDSXri, AArch64::ADDSWri},
4563 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4564 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4565 {AArch64::SUBSXri, AArch64::SUBSWri},
4566 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4567 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4568}
4569
4570MachineInstr *
4571AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4572 MachineOperand &RHS,
4573 MachineIRBuilder &MIRBuilder) const {
4574 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4575 {{AArch64::SUBSXri, AArch64::SUBSWri},
4576 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4577 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4578 {AArch64::ADDSXri, AArch64::ADDSWri},
4579 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4580 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4581}
4582
4583MachineInstr *
4584AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4585 MachineIRBuilder &MIRBuilder) const {
4586 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4587 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4588 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4589 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4590}
4591
4592MachineInstr *
4593AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4594 MachineIRBuilder &MIRBuilder) const {
4595 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4595, __extension__ __PRETTY_FUNCTION__))
;
4596 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4597 LLT Ty = MRI.getType(LHS.getReg());
4598 unsigned RegSize = Ty.getSizeInBits();
4599 bool Is32Bit = (RegSize == 32);
4600 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4601 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4602 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4603 // ANDS needs a logical immediate for its immediate form. Check if we can
4604 // fold one in.
4605 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4606 int64_t Imm = ValAndVReg->Value.getSExtValue();
4607
4608 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4609 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4610 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4611 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4612 return &*TstMI;
4613 }
4614 }
4615
4616 if (auto Fns = selectLogicalShiftedRegister(RHS))
4617 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4618 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4619}
4620
4621MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4622 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4623 MachineIRBuilder &MIRBuilder) const {
4624 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4624, __extension__ __PRETTY_FUNCTION__))
;
4625 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4625, __extension__ __PRETTY_FUNCTION__))
;
4626 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4627 LLT CmpTy = MRI.getType(LHS.getReg());
4628 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4628, __extension__ __PRETTY_FUNCTION__))
;
4629 unsigned Size = CmpTy.getSizeInBits();
4630 (void)Size;
4631 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4631, __extension__ __PRETTY_FUNCTION__))
;
4632 // Fold the compare into a cmn or tst if possible.
4633 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4634 return FoldCmp;
4635 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4636 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4637}
4638
4639MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4640 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4641 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4642#ifndef NDEBUG
4643 LLT Ty = MRI.getType(Dst);
4644 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4645, __extension__ __PRETTY_FUNCTION__))
4645 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4645, __extension__ __PRETTY_FUNCTION__))
;
4646#endif
4647 const Register ZReg = AArch64::WZR;
4648 AArch64CC::CondCode CC1, CC2;
4649 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4650 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4651 if (CC2 == AArch64CC::AL)
4652 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4653 MIRBuilder);
4654 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4655 Register Def1Reg = MRI.createVirtualRegister(RC);
4656 Register Def2Reg = MRI.createVirtualRegister(RC);
4657 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4658 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4659 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4660 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4661 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4662 return &*OrMI;
4663}
4664
4665MachineInstr *
4666AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4667 MachineIRBuilder &MIRBuilder,
4668 Optional<CmpInst::Predicate> Pred) const {
4669 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4670 LLT Ty = MRI.getType(LHS);
4671 if (Ty.isVector())
4672 return nullptr;
4673 unsigned OpSize = Ty.getSizeInBits();
4674 if (OpSize != 32 && OpSize != 64)
4675 return nullptr;
4676
4677 // If this is a compare against +0.0, then we don't have
4678 // to explicitly materialize a constant.
4679 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4680 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4681
4682 auto IsEqualityPred = [](CmpInst::Predicate P) {
4683 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4684 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4685 };
4686 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4687 // Try commutating the operands.
4688 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4689 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4690 ShouldUseImm = true;
4691 std::swap(LHS, RHS);
4692 }
4693 }
4694 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4695 {AArch64::FCMPSri, AArch64::FCMPDri}};
4696 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4697
4698 // Partially build the compare. Decide if we need to add a use for the
4699 // third operand based off whether or not we're comparing against 0.0.
4700 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4701 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4702 if (!ShouldUseImm)
4703 CmpMI.addUse(RHS);
4704 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4705 return &*CmpMI;
4706}
4707
4708MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4709 Optional<Register> Dst, Register Op1, Register Op2,
4710 MachineIRBuilder &MIRBuilder) const {
4711 // We implement a vector concat by:
4712 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4713 // 2. Insert the upper vector into the destination's upper element
4714 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4715 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4716
4717 const LLT Op1Ty = MRI.getType(Op1);
4718 const LLT Op2Ty = MRI.getType(Op2);
4719
4720 if (Op1Ty != Op2Ty) {
4721 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4722 return nullptr;
4723 }
4724 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4724, __extension__ __PRETTY_FUNCTION__))
;
4725
4726 if (Op1Ty.getSizeInBits() >= 128) {
4727 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4728 return nullptr;
4729 }
4730
4731 // At the moment we just support 64 bit vector concats.
4732 if (Op1Ty.getSizeInBits() != 64) {
4733 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4734 return nullptr;
4735 }
4736
4737 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4738 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4739 const TargetRegisterClass *DstRC =
4740 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4741
4742 MachineInstr *WidenedOp1 =
4743 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4744 MachineInstr *WidenedOp2 =
4745 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4746 if (!WidenedOp1 || !WidenedOp2) {
4747 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4748 return nullptr;
4749 }
4750
4751 // Now do the insert of the upper element.
4752 unsigned InsertOpc, InsSubRegIdx;
4753 std::tie(InsertOpc, InsSubRegIdx) =
4754 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4755
4756 if (!Dst)
4757 Dst = MRI.createVirtualRegister(DstRC);
4758 auto InsElt =
4759 MIRBuilder
4760 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4761 .addImm(1) /* Lane index */
4762 .addUse(WidenedOp2->getOperand(0).getReg())
4763 .addImm(0);
4764 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4765 return &*InsElt;
4766}
4767
4768MachineInstr *
4769AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4770 Register Src2, AArch64CC::CondCode Pred,
4771 MachineIRBuilder &MIRBuilder) const {
4772 auto &MRI = *MIRBuilder.getMRI();
4773 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4774 // If we used a register class, then this won't necessarily have an LLT.
4775 // Compute the size based off whether or not we have a class or bank.
4776 unsigned Size;
4777 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4778 Size = TRI.getRegSizeInBits(*RC);
4779 else
4780 Size = MRI.getType(Dst).getSizeInBits();
4781 // Some opcodes use s1.
4782 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4782, __extension__ __PRETTY_FUNCTION__))
;
4783 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4784 unsigned Opc = OpcTable[Size == 64];
4785 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4786 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4787 return &*CSINC;
4788}
4789
4790std::pair<MachineInstr *, AArch64CC::CondCode>
4791AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4792 MachineOperand &LHS,
4793 MachineOperand &RHS,
4794 MachineIRBuilder &MIRBuilder) const {
4795 switch (Opcode) {
4796 default:
4797 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4797)
;
4798 case TargetOpcode::G_SADDO:
4799 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4800 case TargetOpcode::G_UADDO:
4801 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4802 case TargetOpcode::G_SSUBO:
4803 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4804 case TargetOpcode::G_USUBO:
4805 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4806 }
4807}
4808
4809/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4810/// expressed as a conjunction.
4811/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4812/// changing the conditions on the CMP tests.
4813/// (this means we can call emitConjunctionRec() with
4814/// Negate==true on this sub-tree)
4815/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4816/// cannot do the negation naturally. We are required to
4817/// emit the subtree first in this case.
4818/// \param WillNegate Is true if are called when the result of this
4819/// subexpression must be negated. This happens when the
4820/// outer expression is an OR. We can use this fact to know
4821/// that we have a double negation (or (or ...) ...) that
4822/// can be implemented for free.
4823static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4824 bool WillNegate, MachineRegisterInfo &MRI,
4825 unsigned Depth = 0) {
4826 if (!MRI.hasOneNonDBGUse(Val))
4827 return false;
4828 MachineInstr *ValDef = MRI.getVRegDef(Val);
4829 unsigned Opcode = ValDef->getOpcode();
4830 if (Opcode == TargetOpcode::G_TRUNC) {
4831 // Look through a trunc.
4832 Val = ValDef->getOperand(1).getReg();
4833 ValDef = MRI.getVRegDef(Val);
4834 Opcode = ValDef->getOpcode();
4835 }
4836 if (isa<GAnyCmp>(ValDef)) {
4837 CanNegate = true;
4838 MustBeFirst = false;
4839 return true;
4840 }
4841 // Protect against exponential runtime and stack overflow.
4842 if (Depth > 6)
4843 return false;
4844 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4845 bool IsOR = Opcode == TargetOpcode::G_OR;
4846 Register O0 = ValDef->getOperand(1).getReg();
4847 Register O1 = ValDef->getOperand(2).getReg();
4848 bool CanNegateL;
4849 bool MustBeFirstL;
4850 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4851 return false;
4852 bool CanNegateR;
4853 bool MustBeFirstR;
4854 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4855 return false;
4856
4857 if (MustBeFirstL && MustBeFirstR)
4858 return false;
4859
4860 if (IsOR) {
4861 // For an OR expression we need to be able to naturally negate at least
4862 // one side or we cannot do the transformation at all.
4863 if (!CanNegateL && !CanNegateR)
4864 return false;
4865 // If we the result of the OR will be negated and we can naturally negate
4866 // the leaves, then this sub-tree as a whole negates naturally.
4867 CanNegate = WillNegate && CanNegateL && CanNegateR;
4868 // If we cannot naturally negate the whole sub-tree, then this must be
4869 // emitted first.
4870 MustBeFirst = !CanNegate;
4871 } else {
4872 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4872, __extension__ __PRETTY_FUNCTION__))
;
4873 // We cannot naturally negate an AND operation.
4874 CanNegate = false;
4875 MustBeFirst = MustBeFirstL || MustBeFirstR;
4876 }
4877 return true;
4878 }
4879 return false;
4880}
4881
4882MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4883 Register LHS, Register RHS, CmpInst::Predicate CC,
4884 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4885 MachineIRBuilder &MIB) const {
4886 // TODO: emit CMN as an optimization.
4887 auto &MRI = *MIB.getMRI();
4888 LLT OpTy = MRI.getType(LHS);
4889 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4889, __extension__ __PRETTY_FUNCTION__))
;
4890 unsigned CCmpOpc;
4891 if (CmpInst::isIntPredicate(CC)) {
4892 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4893 } else {
4894 switch (OpTy.getSizeInBits()) {
4895 case 16:
4896 CCmpOpc = AArch64::FCCMPHrr;
4897 break;
4898 case 32:
4899 CCmpOpc = AArch64::FCCMPSrr;
4900 break;
4901 case 64:
4902 CCmpOpc = AArch64::FCCMPDrr;
4903 break;
4904 default:
4905 return nullptr;
4906 }
4907 }
4908 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4909 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4910 auto CCmp =
4911 MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
4912 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4913 return &*CCmp;
4914}
4915
4916MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4917 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4918 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4919 // We're at a tree leaf, produce a conditional comparison operation.
4920 auto &MRI = *MIB.getMRI();
4921 MachineInstr *ValDef = MRI.getVRegDef(Val);
4922 unsigned Opcode = ValDef->getOpcode();
4923 if (Opcode == TargetOpcode::G_TRUNC) {
4924 // Look through a trunc.
4925 Val = ValDef->getOperand(1).getReg();
4926 ValDef = MRI.getVRegDef(Val);
4927 Opcode = ValDef->getOpcode();
4928 }
4929 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4930 Register LHS = Cmp->getLHSReg();
4931 Register RHS = Cmp->getRHSReg();
4932 CmpInst::Predicate CC = Cmp->getCond();
4933 if (Negate)
4934 CC = CmpInst::getInversePredicate(CC);
4935 if (isa<GICmp>(Cmp)) {
4936 OutCC = changeICMPPredToAArch64CC(CC);
4937 } else {
4938 // Handle special FP cases.
4939 AArch64CC::CondCode ExtraCC;
4940 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4941 // Some floating point conditions can't be tested with a single condition
4942 // code. Construct an additional comparison in this case.
4943 if (ExtraCC != AArch64CC::AL) {
4944 MachineInstr *ExtraCmp;
4945 if (!CCOp)
4946 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4947 else
4948 ExtraCmp =
4949 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4950 CCOp = ExtraCmp->getOperand(0).getReg();
4951 Predicate = ExtraCC;
4952 }
4953 }
4954
4955 // Produce a normal comparison if we are first in the chain
4956 if (!CCOp) {
4957 auto Dst = MRI.cloneVirtualRegister(LHS);
4958 if (isa<GICmp>(Cmp))
4959 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4960 return emitFPCompare(Cmp->getOperand(2).getReg(),
4961 Cmp->getOperand(3).getReg(), MIB);
4962 }
4963 // Otherwise produce a ccmp.
4964 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4965 }
4966 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4966, __extension__ __PRETTY_FUNCTION__))
;
4967
4968 bool IsOR = Opcode == TargetOpcode::G_OR;
4969
4970 Register LHS = ValDef->getOperand(1).getReg();
4971 bool CanNegateL;
4972 bool MustBeFirstL;
4973 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4974 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4974, __extension__ __PRETTY_FUNCTION__))
;
4975 (void)ValidL;
4976
4977 Register RHS = ValDef->getOperand(2).getReg();
4978 bool CanNegateR;
4979 bool MustBeFirstR;
4980 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4981 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4981, __extension__ __PRETTY_FUNCTION__))
;
4982 (void)ValidR;
4983
4984 // Swap sub-tree that must come first to the right side.
4985 if (MustBeFirstL) {
4986 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4986, __extension__ __PRETTY_FUNCTION__))
;
4987 std::swap(LHS, RHS);
4988 std::swap(CanNegateL, CanNegateR);
4989 std::swap(MustBeFirstL, MustBeFirstR);
4990 }
4991
4992 bool NegateR;
4993 bool NegateAfterR;
4994 bool NegateL;
4995 bool NegateAfterAll;
4996 if (Opcode == TargetOpcode::G_OR) {
4997 // Swap the sub-tree that we can negate naturally to the left.
4998 if (!CanNegateL) {
4999 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4999, __extension__ __PRETTY_FUNCTION__))
;
5000 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5000, __extension__ __PRETTY_FUNCTION__))
;
5001 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5001, __extension__ __PRETTY_FUNCTION__))
;
5002 std::swap(LHS, RHS);
5003 NegateR = false;
5004 NegateAfterR = true;
5005 } else {
5006 // Negate the left sub-tree if possible, otherwise negate the result.
5007 NegateR = CanNegateR;
5008 NegateAfterR = !CanNegateR;
5009 }
5010 NegateL = true;
5011 NegateAfterAll = !Negate;
5012 } else {
5013 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5014, __extension__ __PRETTY_FUNCTION__))
5014 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5014, __extension__ __PRETTY_FUNCTION__))
;
5015 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5015, __extension__ __PRETTY_FUNCTION__))
;
5016
5017 NegateL = false;
5018 NegateR = false;
5019 NegateAfterR = false;
5020 NegateAfterAll = false;
5021 }
5022
5023 // Emit sub-trees.
5024 AArch64CC::CondCode RHSCC;
5025 MachineInstr *CmpR =
5026 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
5027 if (NegateAfterR)
5028 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
5029 MachineInstr *CmpL = emitConjunctionRec(
5030 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
5031 if (NegateAfterAll)
5032 OutCC = AArch64CC::getInvertedCondCode(OutCC);
5033 return CmpL;
5034}
5035
5036MachineInstr *AArch64InstructionSelector::emitConjunction(
5037 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5038 bool DummyCanNegate;
5039 bool DummyMustBeFirst;
5040 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5041 *MIB.getMRI()))
5042 return nullptr;
5043 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5044}
5045
5046bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5047 MachineInstr &CondMI) {
5048 AArch64CC::CondCode AArch64CC;
5049 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5050 if (!ConjMI)
5051 return false;
5052
5053 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5054 SelI.eraseFromParent();
5055 return true;
5056}
5057
5058bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5059 MachineRegisterInfo &MRI = *MIB.getMRI();
5060 // We want to recognize this pattern:
5061 //
5062 // $z = G_FCMP pred, $x, $y
5063 // ...
5064 // $w = G_SELECT $z, $a, $b
5065 //
5066 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5067 // some copies/truncs in between.)
5068 //
5069 // If we see this, then we can emit something like this:
5070 //
5071 // fcmp $x, $y
5072 // fcsel $w, $a, $b, pred
5073 //
5074 // Rather than emitting both of the rather long sequences in the standard
5075 // G_FCMP/G_SELECT select methods.
5076
5077 // First, check if the condition is defined by a compare.
5078 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5079 while (CondDef) {
5080 // We can only fold if all of the defs have one use.
5081 Register CondDefReg = CondDef->getOperand(0).getReg();
5082 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5083 // Unless it's another select.
5084 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5085 if (CondDef == &UI)
5086 continue;
5087 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5088 return false;
5089 }
5090 }
5091
5092 // We can skip over G_TRUNC since the condition is 1-bit.
5093 // Truncating/extending can have no impact on the value.
5094 unsigned Opc = CondDef->getOpcode();
5095 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
5096 break;
5097
5098 // Can't see past copies from physregs.
5099 if (Opc == TargetOpcode::COPY &&
5100 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
5101 return false;
5102
5103 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
5104 }
5105
5106 // Is the condition defined by a compare?
5107 if (!CondDef)
5108 return false;
5109
5110 unsigned CondOpc = CondDef->getOpcode();
5111 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5112 if (tryOptSelectConjunction(I, *CondDef))
5113 return true;
5114 return false;
5115 }
5116
5117 AArch64CC::CondCode CondCode;
5118 if (CondOpc == TargetOpcode::G_ICMP) {
5119 auto Pred =
5120 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5121 CondCode = changeICMPPredToAArch64CC(Pred);
5122 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5123 CondDef->getOperand(1), MIB);
5124 } else {
5125 // Get the condition code for the select.
5126 auto Pred =
5127 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5128 AArch64CC::CondCode CondCode2;
5129 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5130
5131 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5132 // instructions to emit the comparison.
5133 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5134 // unnecessary.
5135 if (CondCode2 != AArch64CC::AL)
5136 return false;
5137
5138 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5139 CondDef->getOperand(3).getReg(), MIB)) {
5140 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5141 return false;
5142 }
5143 }
5144
5145 // Emit the select.
5146 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5147 I.getOperand(3).getReg(), CondCode, MIB);
5148 I.eraseFromParent();
5149 return true;
5150}
5151
5152MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5153 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5154 MachineIRBuilder &MIRBuilder) const {
5155 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
5156 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
;
5157 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5158 // We want to find this sort of thing:
5159 // x = G_SUB 0, y
5160 // G_ICMP z, x
5161 //
5162 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5163 // e.g:
5164 //
5165 // cmn z, y
5166
5167 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5168 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5169 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5170 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5171 // Given this:
5172 //
5173 // x = G_SUB 0, y
5174 // G_ICMP x, z
5175 //
5176 // Produce this:
5177 //
5178 // cmn y, z
5179 if (isCMN(LHSDef, P, MRI))
5180 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5181
5182 // Same idea here, but with the RHS of the compare instead:
5183 //
5184 // Given this:
5185 //
5186 // x = G_SUB 0, y
5187 // G_ICMP z, x
5188 //
5189 // Produce this:
5190 //
5191 // cmn z, y
5192 if (isCMN(RHSDef, P, MRI))
5193 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5194
5195 // Given this:
5196 //
5197 // z = G_AND x, y
5198 // G_ICMP z, 0
5199 //
5200 // Produce this if the compare is signed:
5201 //
5202 // tst x, y
5203 if (!CmpInst::isUnsigned(P) && LHSDef &&
5204 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5205 // Make sure that the RHS is 0.
5206 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5207 if (!ValAndVReg || ValAndVReg->Value != 0)
5208 return nullptr;
5209
5210 return emitTST(LHSDef->getOperand(1),
5211 LHSDef->getOperand(2), MIRBuilder);
5212 }
5213
5214 return nullptr;
5215}
5216
5217bool AArch64InstructionSelector::selectShuffleVector(
5218 MachineInstr &I, MachineRegisterInfo &MRI) {
5219 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5220 Register Src1Reg = I.getOperand(1).getReg();
5221 const LLT Src1Ty = MRI.getType(Src1Reg);
5222 Register Src2Reg = I.getOperand(2).getReg();
5223 const LLT Src2Ty = MRI.getType(Src2Reg);
5224 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5225
5226 MachineBasicBlock &MBB = *I.getParent();
5227 MachineFunction &MF = *MBB.getParent();
5228 LLVMContext &Ctx = MF.getFunction().getContext();
5229
5230 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5231 // it's originated from a <1 x T> type. Those should have been lowered into
5232 // G_BUILD_VECTOR earlier.
5233 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5234 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5235 return false;
5236 }
5237
5238 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5239
5240 SmallVector<Constant *, 64> CstIdxs;
5241 for (int Val : Mask) {
5242 // For now, any undef indexes we'll just assume to be 0. This should be
5243 // optimized in future, e.g. to select DUP etc.
5244 Val = Val < 0 ? 0 : Val;
5245 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5246 unsigned Offset = Byte + Val * BytesPerElt;
5247 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5248 }
5249 }
5250
5251 // Use a constant pool to load the index vector for TBL.
5252 Constant *CPVal = ConstantVector::get(CstIdxs);
5253 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5254 if (!IndexLoad) {
5255 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5256 return false;
5257 }
5258
5259 if (DstTy.getSizeInBits() != 128) {
5260 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5260, __extension__ __PRETTY_FUNCTION__))
;
5261 // This case can be done with TBL1.
5262 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
5263 if (!Concat) {
5264 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5265 return false;
5266 }
5267
5268 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5269 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5270 IndexLoad->getOperand(0).getReg(), MIB);
5271
5272 auto TBL1 = MIB.buildInstr(
5273 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5274 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5275 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5276
5277 auto Copy =
5278 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5279 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5280 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5281 I.eraseFromParent();
5282 return true;
5283 }
5284
5285 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5286 // Q registers for regalloc.
5287 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5288 auto RegSeq = createQTuple(Regs, MIB);
5289 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5290 {RegSeq, IndexLoad->getOperand(0)});
5291 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5292 I.eraseFromParent();
5293 return true;
5294}
5295
5296MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5297 Optional<Register> DstReg, Register SrcReg, Register EltReg,
5298 unsigned LaneIdx, const RegisterBank &RB,
5299 MachineIRBuilder &MIRBuilder) const {
5300 MachineInstr *InsElt = nullptr;
5301 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5302 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5303
5304 // Create a register to define with the insert if one wasn't passed in.
5305 if (!DstReg)
5306 DstReg = MRI.createVirtualRegister(DstRC);
5307
5308 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5309 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5310
5311 if (RB.getID() == AArch64::FPRRegBankID) {
5312 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5313 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5314 .addImm(LaneIdx)
5315 .addUse(InsSub->getOperand(0).getReg())
5316 .addImm(0);
5317 } else {
5318 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5319 .addImm(LaneIdx)
5320 .addUse(EltReg);
5321 }
5322
5323 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5324 return InsElt;
5325}
5326
5327bool AArch64InstructionSelector::selectUSMovFromExtend(
5328 MachineInstr &MI, MachineRegisterInfo &MRI) {
5329 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5330 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5331 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5332 return false;
5333 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5334 const Register DefReg = MI.getOperand(0).getReg();
5335 const LLT DstTy = MRI.getType(DefReg);
5336 unsigned DstSize = DstTy.getSizeInBits();
5337
5338 if (DstSize != 32 && DstSize != 64)
5339 return false;
5340
5341 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5342 MI.getOperand(1).getReg(), MRI);
5343 int64_t Lane;
5344 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5345 return false;
5346 Register Src0 = Extract->getOperand(1).getReg();
5347
5348 const LLT &VecTy = MRI.getType(Src0);
5349
5350 if (VecTy.getSizeInBits() != 128) {
5351 const MachineInstr *ScalarToVector = emitScalarToVector(
5352 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5353 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5353, __extension__ __PRETTY_FUNCTION__))
;
5354 Src0 = ScalarToVector->getOperand(0).getReg();
5355 }
5356
5357 unsigned Opcode;
5358 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5359 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5360 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5361 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5362 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5363 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5364 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5365 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5366 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5367 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5368 else
5369 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5369)
;
5370
5371 // We may need to generate one of these, depending on the type and sign of the
5372 // input:
5373 // DstReg = SMOV Src0, Lane;
5374 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5375 MachineInstr *ExtI = nullptr;
5376 if (DstSize == 64 && !IsSigned) {
5377 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5378 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5379 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5380 .addImm(0)
5381 .addUse(NewReg)
5382 .addImm(AArch64::sub_32);
5383 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5384 } else
5385 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5386
5387 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5388 MI.eraseFromParent();
5389 return true;
5390}
5391
5392bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5393 MachineRegisterInfo &MRI) {
5394 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5394, __extension__ __PRETTY_FUNCTION__))
;
5395
5396 // Get information on the destination.
5397 Register DstReg = I.getOperand(0).getReg();
5398 const LLT DstTy = MRI.getType(DstReg);
5399 unsigned VecSize = DstTy.getSizeInBits();
5400
5401 // Get information on the element we want to insert into the destination.
5402 Register EltReg = I.getOperand(2).getReg();
5403 const LLT EltTy = MRI.getType(EltReg);
5404 unsigned EltSize = EltTy.getSizeInBits();
5405 if (EltSize < 16 || EltSize > 64)
5406 return false; // Don't support all element types yet.
5407
5408 // Find the definition of the index. Bail out if it's not defined by a
5409 // G_CONSTANT.
5410 Register IdxReg = I.getOperand(3).getReg();
5411 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5412 if (!VRegAndVal)
5413 return false;
5414 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5415
5416 // Perform the lane insert.
5417 Register SrcReg = I.getOperand(1).getReg();
5418 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5419
5420 if (VecSize < 128) {
5421 // If the vector we're inserting into is smaller than 128 bits, widen it
5422 // to 128 to do the insert.
5423 MachineInstr *ScalarToVec =
5424 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5425 if (!ScalarToVec)
5426 return false;
5427 SrcReg = ScalarToVec->getOperand(0).getReg();
5428 }
5429
5430 // Create an insert into a new FPR128 register.
5431 // Note that if our vector is already 128 bits, we end up emitting an extra
5432 // register.
5433 MachineInstr *InsMI =
5434 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5435
5436 if (VecSize < 128) {
5437 // If we had to widen to perform the insert, then we have to demote back to
5438 // the original size to get the result we want.
5439 Register DemoteVec = InsMI->getOperand(0).getReg();
5440 const TargetRegisterClass *RC =
5441 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
5442 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5443 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5444 return false;
5445 }
5446 unsigned SubReg = 0;
5447 if (!getSubRegForClass(RC, TRI, SubReg))
5448 return false;
5449 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5450 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5451 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5452 return false;
5453 }
5454 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5455 .addReg(DemoteVec, 0, SubReg);
5456 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5457 } else {
5458 // No widening needed.
5459 InsMI->getOperand(0).setReg(DstReg);
5460 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5461 }
5462
5463 I.eraseFromParent();
5464 return true;
5465}
5466
5467MachineInstr *
5468AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5469 MachineIRBuilder &MIRBuilder,
5470 MachineRegisterInfo &MRI) {
5471 LLT DstTy = MRI.getType(Dst);
5472 unsigned DstSize = DstTy.getSizeInBits();
5473 if (CV->isNullValue()) {
5474 if (DstSize == 128) {
5475 auto Mov =
5476 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5477 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5478 return &*Mov;
5479 }
5480
5481 if (DstSize == 64) {
5482 auto Mov =
5483 MIRBuilder
5484 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5485 .addImm(0);
5486 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5487 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5488 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5489 return &*Copy;
5490 }
5491 }
5492
5493 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5494 if (!CPLoad) {
5495 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5496 return nullptr;
5497 }
5498
5499 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5500 RBI.constrainGenericRegister(
5501 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5502 return &*Copy;
5503}
5504
5505bool AArch64InstructionSelector::tryOptConstantBuildVec(
5506 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5507 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5507, __extension__ __PRETTY_FUNCTION__))
;
5508 unsigned DstSize = DstTy.getSizeInBits();
5509 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5509, __extension__ __PRETTY_FUNCTION__))
;
5510 if (DstSize < 32)
5511 return false;
5512 // Check if we're building a constant vector, in which case we want to
5513 // generate a constant pool load instead of a vector insert sequence.
5514 SmallVector<Constant *, 16> Csts;
5515 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5516 // Try to find G_CONSTANT or G_FCONSTANT
5517 auto *OpMI =
5518 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5519 if (OpMI)
5520 Csts.emplace_back(
5521 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5522 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5523 I.getOperand(Idx).getReg(), MRI)))
5524 Csts.emplace_back(
5525 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5526 else
5527 return false;
5528 }
5529 Constant *CV = ConstantVector::get(Csts);
5530 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5531 return false;
5532 I.eraseFromParent();
5533 return true;
5534}
5535
5536bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5537 MachineInstr &I, Ma