Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 6573, column 67
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-19-125528-33783-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/ADT/Optional.h"
24#include "llvm/BinaryFormat/Dwarf.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if None is
148 /// provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *emitFPCompare(Register LHS, Register RHS,
220 MachineIRBuilder &MIRBuilder,
221 Optional<CmpInst::Predicate> = None) const;
222
223 MachineInstr *emitInstr(unsigned Opcode,
224 std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = None) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510static const TargetRegisterClass *
511getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
512 const RegisterBankInfo &RBI,
513 bool GetAllRegSet = false) {
514 if (RB.getID() == AArch64::GPRRegBankID) {
515 if (Ty.getSizeInBits() <= 32)
516 return GetAllRegSet ? &AArch64::GPR32allRegClass
517 : &AArch64::GPR32RegClass;
518 if (Ty.getSizeInBits() == 64)
519 return GetAllRegSet ? &AArch64::GPR64allRegClass
520 : &AArch64::GPR64RegClass;
521 if (Ty.getSizeInBits() == 128)
522 return &AArch64::XSeqPairsClassRegClass;
523 return nullptr;
524 }
525
526 if (RB.getID() == AArch64::FPRRegBankID) {
527 switch (Ty.getSizeInBits()) {
528 case 8:
529 return &AArch64::FPR8RegClass;
530 case 16:
531 return &AArch64::FPR16RegClass;
532 case 32:
533 return &AArch64::FPR32RegClass;
534 case 64:
535 return &AArch64::FPR64RegClass;
536 case 128:
537 return &AArch64::FPR128RegClass;
538 }
539 return nullptr;
540 }
541
542 return nullptr;
543}
544
545/// Given a register bank, and size in bits, return the smallest register class
546/// that can represent that combination.
547static const TargetRegisterClass *
548getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
549 bool GetAllRegSet = false) {
550 unsigned RegBankID = RB.getID();
551
552 if (RegBankID == AArch64::GPRRegBankID) {
553 if (SizeInBits <= 32)
554 return GetAllRegSet ? &AArch64::GPR32allRegClass
555 : &AArch64::GPR32RegClass;
556 if (SizeInBits == 64)
557 return GetAllRegSet ? &AArch64::GPR64allRegClass
558 : &AArch64::GPR64RegClass;
559 if (SizeInBits == 128)
560 return &AArch64::XSeqPairsClassRegClass;
561 }
562
563 if (RegBankID == AArch64::FPRRegBankID) {
564 switch (SizeInBits) {
565 default:
566 return nullptr;
567 case 8:
568 return &AArch64::FPR8RegClass;
569 case 16:
570 return &AArch64::FPR16RegClass;
571 case 32:
572 return &AArch64::FPR32RegClass;
573 case 64:
574 return &AArch64::FPR64RegClass;
575 case 128:
576 return &AArch64::FPR128RegClass;
577 }
578 }
579
580 return nullptr;
581}
582
583/// Returns the correct subregister to use for a given register class.
584static bool getSubRegForClass(const TargetRegisterClass *RC,
585 const TargetRegisterInfo &TRI, unsigned &SubReg) {
586 switch (TRI.getRegSizeInBits(*RC)) {
587 case 8:
588 SubReg = AArch64::bsub;
589 break;
590 case 16:
591 SubReg = AArch64::hsub;
592 break;
593 case 32:
594 if (RC != &AArch64::FPR32RegClass)
595 SubReg = AArch64::sub_32;
596 else
597 SubReg = AArch64::ssub;
598 break;
599 case 64:
600 SubReg = AArch64::dsub;
601 break;
602 default:
603 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
604 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
605 return false;
606 }
607
608 return true;
609}
610
611/// Returns the minimum size the given register bank can hold.
612static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
613 switch (RB.getID()) {
614 case AArch64::GPRRegBankID:
615 return 32;
616 case AArch64::FPRRegBankID:
617 return 8;
618 default:
619 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 619)
;
620 }
621}
622
623/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
624/// Helper function for functions like createDTuple and createQTuple.
625///
626/// \p RegClassIDs - The list of register class IDs available for some tuple of
627/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
628/// expected to contain between 2 and 4 tuple classes.
629///
630/// \p SubRegs - The list of subregister classes associated with each register
631/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
632/// subregister class. The index of each subregister class is expected to
633/// correspond with the index of each register class.
634///
635/// \returns Either the destination register of REG_SEQUENCE instruction that
636/// was created, or the 0th element of \p Regs if \p Regs contains a single
637/// element.
638static Register createTuple(ArrayRef<Register> Regs,
639 const unsigned RegClassIDs[],
640 const unsigned SubRegs[], MachineIRBuilder &MIB) {
641 unsigned NumRegs = Regs.size();
642 if (NumRegs == 1)
643 return Regs[0];
644 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 645, __extension__ __PRETTY_FUNCTION__))
645 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 645, __extension__ __PRETTY_FUNCTION__))
;
646 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
647 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
648 auto RegSequence =
649 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
650 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
651 RegSequence.addUse(Regs[I]);
652 RegSequence.addImm(SubRegs[I]);
653 }
654 return RegSequence.getReg(0);
655}
656
657/// Create a tuple of D-registers using the registers in \p Regs.
658static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
659 static const unsigned RegClassIDs[] = {
660 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
661 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
662 AArch64::dsub2, AArch64::dsub3};
663 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
664}
665
666/// Create a tuple of Q-registers using the registers in \p Regs.
667static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
668 static const unsigned RegClassIDs[] = {
669 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
670 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
671 AArch64::qsub2, AArch64::qsub3};
672 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
673}
674
675static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
676 auto &MI = *Root.getParent();
677 auto &MBB = *MI.getParent();
678 auto &MF = *MBB.getParent();
679 auto &MRI = MF.getRegInfo();
680 uint64_t Immed;
681 if (Root.isImm())
682 Immed = Root.getImm();
683 else if (Root.isCImm())
684 Immed = Root.getCImm()->getZExtValue();
685 else if (Root.isReg()) {
686 auto ValAndVReg =
687 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
688 if (!ValAndVReg)
689 return None;
690 Immed = ValAndVReg->Value.getSExtValue();
691 } else
692 return None;
693 return Immed;
694}
695
696/// Check whether \p I is a currently unsupported binary operation:
697/// - it has an unsized type
698/// - an operand is not a vreg
699/// - all operands are not in the same bank
700/// These are checks that should someday live in the verifier, but right now,
701/// these are mostly limitations of the aarch64 selector.
702static bool unsupportedBinOp(const MachineInstr &I,
703 const AArch64RegisterBankInfo &RBI,
704 const MachineRegisterInfo &MRI,
705 const AArch64RegisterInfo &TRI) {
706 LLT Ty = MRI.getType(I.getOperand(0).getReg());
707 if (!Ty.isValid()) {
708 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
709 return true;
710 }
711
712 const RegisterBank *PrevOpBank = nullptr;
713 for (auto &MO : I.operands()) {
714 // FIXME: Support non-register operands.
715 if (!MO.isReg()) {
716 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
717 return true;
718 }
719
720 // FIXME: Can generic operations have physical registers operands? If
721 // so, this will need to be taught about that, and we'll need to get the
722 // bank out of the minimal class for the register.
723 // Either way, this needs to be documented (and possibly verified).
724 if (!Register::isVirtualRegister(MO.getReg())) {
725 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
726 return true;
727 }
728
729 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
730 if (!OpBank) {
731 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
732 return true;
733 }
734
735 if (PrevOpBank && OpBank != PrevOpBank) {
736 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
737 return true;
738 }
739 PrevOpBank = OpBank;
740 }
741 return false;
742}
743
744/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
745/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
746/// and of size \p OpSize.
747/// \returns \p GenericOpc if the combination is unsupported.
748static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
749 unsigned OpSize) {
750 switch (RegBankID) {
751 case AArch64::GPRRegBankID:
752 if (OpSize == 32) {
753 switch (GenericOpc) {
754 case TargetOpcode::G_SHL:
755 return AArch64::LSLVWr;
756 case TargetOpcode::G_LSHR:
757 return AArch64::LSRVWr;
758 case TargetOpcode::G_ASHR:
759 return AArch64::ASRVWr;
760 default:
761 return GenericOpc;
762 }
763 } else if (OpSize == 64) {
764 switch (GenericOpc) {
765 case TargetOpcode::G_PTR_ADD:
766 return AArch64::ADDXrr;
767 case TargetOpcode::G_SHL:
768 return AArch64::LSLVXr;
769 case TargetOpcode::G_LSHR:
770 return AArch64::LSRVXr;
771 case TargetOpcode::G_ASHR:
772 return AArch64::ASRVXr;
773 default:
774 return GenericOpc;
775 }
776 }
777 break;
778 case AArch64::FPRRegBankID:
779 switch (OpSize) {
780 case 32:
781 switch (GenericOpc) {
782 case TargetOpcode::G_FADD:
783 return AArch64::FADDSrr;
784 case TargetOpcode::G_FSUB:
785 return AArch64::FSUBSrr;
786 case TargetOpcode::G_FMUL:
787 return AArch64::FMULSrr;
788 case TargetOpcode::G_FDIV:
789 return AArch64::FDIVSrr;
790 default:
791 return GenericOpc;
792 }
793 case 64:
794 switch (GenericOpc) {
795 case TargetOpcode::G_FADD:
796 return AArch64::FADDDrr;
797 case TargetOpcode::G_FSUB:
798 return AArch64::FSUBDrr;
799 case TargetOpcode::G_FMUL:
800 return AArch64::FMULDrr;
801 case TargetOpcode::G_FDIV:
802 return AArch64::FDIVDrr;
803 case TargetOpcode::G_OR:
804 return AArch64::ORRv8i8;
805 default:
806 return GenericOpc;
807 }
808 }
809 break;
810 }
811 return GenericOpc;
812}
813
814/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
815/// appropriate for the (value) register bank \p RegBankID and of memory access
816/// size \p OpSize. This returns the variant with the base+unsigned-immediate
817/// addressing mode (e.g., LDRXui).
818/// \returns \p GenericOpc if the combination is unsupported.
819static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
820 unsigned OpSize) {
821 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
822 switch (RegBankID) {
823 case AArch64::GPRRegBankID:
824 switch (OpSize) {
825 case 8:
826 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
827 case 16:
828 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
829 case 32:
830 return isStore ? AArch64::STRWui : AArch64::LDRWui;
831 case 64:
832 return isStore ? AArch64::STRXui : AArch64::LDRXui;
833 }
834 break;
835 case AArch64::FPRRegBankID:
836 switch (OpSize) {
837 case 8:
838 return isStore ? AArch64::STRBui : AArch64::LDRBui;
839 case 16:
840 return isStore ? AArch64::STRHui : AArch64::LDRHui;
841 case 32:
842 return isStore ? AArch64::STRSui : AArch64::LDRSui;
843 case 64:
844 return isStore ? AArch64::STRDui : AArch64::LDRDui;
845 case 128:
846 return isStore ? AArch64::STRQui : AArch64::LDRQui;
847 }
848 break;
849 }
850 return GenericOpc;
851}
852
853#ifndef NDEBUG
854/// Helper function that verifies that we have a valid copy at the end of
855/// selectCopy. Verifies that the source and dest have the expected sizes and
856/// then returns true.
857static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
858 const MachineRegisterInfo &MRI,
859 const TargetRegisterInfo &TRI,
860 const RegisterBankInfo &RBI) {
861 const Register DstReg = I.getOperand(0).getReg();
862 const Register SrcReg = I.getOperand(1).getReg();
863 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
864 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
865
866 // Make sure the size of the source and dest line up.
867 assert((static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
868 (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
869 // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
870 // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
871 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
872 // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
873 // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
874 // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
875 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
876 "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
;
877
878 // Check the size of the destination.
879 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 880, __extension__ __PRETTY_FUNCTION__))
880 "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 880, __extension__ __PRETTY_FUNCTION__))
;
881
882 return true;
883}
884#endif
885
886/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
887/// to \p *To.
888///
889/// E.g "To = COPY SrcReg:SubReg"
890static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
891 const RegisterBankInfo &RBI, Register SrcReg,
892 const TargetRegisterClass *To, unsigned SubReg) {
893 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 893, __extension__ __PRETTY_FUNCTION__))
;
894 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 894, __extension__ __PRETTY_FUNCTION__))
;
895 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 895, __extension__ __PRETTY_FUNCTION__))
;
896
897 MachineIRBuilder MIB(I);
898 auto SubRegCopy =
899 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
900 MachineOperand &RegOp = I.getOperand(1);
901 RegOp.setReg(SubRegCopy.getReg(0));
902
903 // It's possible that the destination register won't be constrained. Make
904 // sure that happens.
905 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
906 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
907
908 return true;
909}
910
911/// Helper function to get the source and destination register classes for a
912/// copy. Returns a std::pair containing the source register class for the
913/// copy, and the destination register class for the copy. If a register class
914/// cannot be determined, then it will be nullptr.
915static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
916getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
917 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
918 const RegisterBankInfo &RBI) {
919 Register DstReg = I.getOperand(0).getReg();
920 Register SrcReg = I.getOperand(1).getReg();
921 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
922 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
923 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
924 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
925
926 // Special casing for cross-bank copies of s1s. We can technically represent
927 // a 1-bit value with any size of register. The minimum size for a GPR is 32
928 // bits. So, we need to put the FPR on 32 bits as well.
929 //
930 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
931 // then we can pull it into the helpers that get the appropriate class for a
932 // register bank. Or make a new helper that carries along some constraint
933 // information.
934 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
935 SrcSize = DstSize = 32;
936
937 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
938 getMinClassForRegBank(DstRegBank, DstSize, true)};
939}
940
941static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
942 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
943 const RegisterBankInfo &RBI) {
944 Register DstReg = I.getOperand(0).getReg();
945 Register SrcReg = I.getOperand(1).getReg();
946 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
947 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
948
949 // Find the correct register classes for the source and destination registers.
950 const TargetRegisterClass *SrcRC;
951 const TargetRegisterClass *DstRC;
952 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
953
954 if (!DstRC) {
955 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
956 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
957 return false;
958 }
959
960 // A couple helpers below, for making sure that the copy we produce is valid.
961
962 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
963 // to verify that the src and dst are the same size, since that's handled by
964 // the SUBREG_TO_REG.
965 bool KnownValid = false;
966
967 // Returns true, or asserts if something we don't expect happens. Instead of
968 // returning true, we return isValidCopy() to ensure that we verify the
969 // result.
970 auto CheckCopy = [&]() {
971 // If we have a bitcast or something, we can't have physical registers.
972 assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
973 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
974 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
975 "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 975, __extension__ __PRETTY_FUNCTION__))
;
976 bool ValidCopy = true;
977#ifndef NDEBUG
978 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
979 assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy."
) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 979, __extension__ __PRETTY_FUNCTION__))
;
980#endif
981 (void)KnownValid;
982 return ValidCopy;
983 };
984
985 // Is this a copy? If so, then we may need to insert a subregister copy.
986 if (I.isCopy()) {
987 // Yes. Check if there's anything to fix up.
988 if (!SrcRC) {
989 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
990 return false;
991 }
992
993 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
994 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
995 unsigned SubReg;
996
997 // If the source bank doesn't support a subregister copy small enough,
998 // then we first need to copy to the destination bank.
999 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1000 const TargetRegisterClass *DstTempRC =
1001 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1002 getSubRegForClass(DstRC, TRI, SubReg);
1003
1004 MachineIRBuilder MIB(I);
1005 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1006 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1007 } else if (SrcSize > DstSize) {
1008 // If the source register is bigger than the destination we need to
1009 // perform a subregister copy.
1010 const TargetRegisterClass *SubRegRC =
1011 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1012 getSubRegForClass(SubRegRC, TRI, SubReg);
1013 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1014 } else if (DstSize > SrcSize) {
1015 // If the destination register is bigger than the source we need to do
1016 // a promotion using SUBREG_TO_REG.
1017 const TargetRegisterClass *PromotionRC =
1018 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1019 getSubRegForClass(SrcRC, TRI, SubReg);
1020
1021 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1022 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1023 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1024 .addImm(0)
1025 .addUse(SrcReg)
1026 .addImm(SubReg);
1027 MachineOperand &RegOp = I.getOperand(1);
1028 RegOp.setReg(PromoteReg);
1029
1030 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1031 KnownValid = true;
1032 }
1033
1034 // If the destination is a physical register, then there's nothing to
1035 // change, so we're done.
1036 if (Register::isPhysicalRegister(DstReg))
1037 return CheckCopy();
1038 }
1039
1040 // No need to constrain SrcReg. It will get constrained when we hit another
1041 // of its use or its defs. Copies do not have constraints.
1042 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1043 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1044 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1045 return false;
1046 }
1047
1048 // If this a GPR ZEXT that we want to just reduce down into a copy.
1049 // The sizes will be mismatched with the source < 32b but that's ok.
1050 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1051 I.setDesc(TII.get(AArch64::COPY));
1052 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1052, __extension__ __PRETTY_FUNCTION__))
;
1053 return selectCopy(I, TII, MRI, TRI, RBI);
1054 }
1055
1056 I.setDesc(TII.get(AArch64::COPY));
1057 return CheckCopy();
1058}
1059
1060static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1061 if (!DstTy.isScalar() || !SrcTy.isScalar())
1062 return GenericOpc;
1063
1064 const unsigned DstSize = DstTy.getSizeInBits();
1065 const unsigned SrcSize = SrcTy.getSizeInBits();
1066
1067 switch (DstSize) {
1068 case 32:
1069 switch (SrcSize) {
1070 case 32:
1071 switch (GenericOpc) {
1072 case TargetOpcode::G_SITOFP:
1073 return AArch64::SCVTFUWSri;
1074 case TargetOpcode::G_UITOFP:
1075 return AArch64::UCVTFUWSri;
1076 case TargetOpcode::G_FPTOSI:
1077 return AArch64::FCVTZSUWSr;
1078 case TargetOpcode::G_FPTOUI:
1079 return AArch64::FCVTZUUWSr;
1080 default:
1081 return GenericOpc;
1082 }
1083 case 64:
1084 switch (GenericOpc) {
1085 case TargetOpcode::G_SITOFP:
1086 return AArch64::SCVTFUXSri;
1087 case TargetOpcode::G_UITOFP:
1088 return AArch64::UCVTFUXSri;
1089 case TargetOpcode::G_FPTOSI:
1090 return AArch64::FCVTZSUWDr;
1091 case TargetOpcode::G_FPTOUI:
1092 return AArch64::FCVTZUUWDr;
1093 default:
1094 return GenericOpc;
1095 }
1096 default:
1097 return GenericOpc;
1098 }
1099 case 64:
1100 switch (SrcSize) {
1101 case 32:
1102 switch (GenericOpc) {
1103 case TargetOpcode::G_SITOFP:
1104 return AArch64::SCVTFUWDri;
1105 case TargetOpcode::G_UITOFP:
1106 return AArch64::UCVTFUWDri;
1107 case TargetOpcode::G_FPTOSI:
1108 return AArch64::FCVTZSUXSr;
1109 case TargetOpcode::G_FPTOUI:
1110 return AArch64::FCVTZUUXSr;
1111 default:
1112 return GenericOpc;
1113 }
1114 case 64:
1115 switch (GenericOpc) {
1116 case TargetOpcode::G_SITOFP:
1117 return AArch64::SCVTFUXDri;
1118 case TargetOpcode::G_UITOFP:
1119 return AArch64::UCVTFUXDri;
1120 case TargetOpcode::G_FPTOSI:
1121 return AArch64::FCVTZSUXDr;
1122 case TargetOpcode::G_FPTOUI:
1123 return AArch64::FCVTZUUXDr;
1124 default:
1125 return GenericOpc;
1126 }
1127 default:
1128 return GenericOpc;
1129 }
1130 default:
1131 return GenericOpc;
1132 };
1133 return GenericOpc;
1134}
1135
1136MachineInstr *
1137AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1138 Register False, AArch64CC::CondCode CC,
1139 MachineIRBuilder &MIB) const {
1140 MachineRegisterInfo &MRI = *MIB.getMRI();
1141 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
1142 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
1143 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1143, __extension__ __PRETTY_FUNCTION__))
;
1144 LLT Ty = MRI.getType(True);
1145 if (Ty.isVector())
1146 return nullptr;
1147 const unsigned Size = Ty.getSizeInBits();
1148 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1149, __extension__ __PRETTY_FUNCTION__))
1149 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1149, __extension__ __PRETTY_FUNCTION__))
;
1150 const bool Is32Bit = Size == 32;
1151 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1152 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1153 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1154 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1155 return &*FCSel;
1156 }
1157
1158 // By default, we'll try and emit a CSEL.
1159 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1160 bool Optimized = false;
1161 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1162 &Optimized](Register &Reg, Register &OtherReg,
1163 bool Invert) {
1164 if (Optimized)
1165 return false;
1166
1167 // Attempt to fold:
1168 //
1169 // %sub = G_SUB 0, %x
1170 // %select = G_SELECT cc, %reg, %sub
1171 //
1172 // Into:
1173 // %select = CSNEG %reg, %x, cc
1174 Register MatchReg;
1175 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1176 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1177 Reg = MatchReg;
1178 if (Invert) {
1179 CC = AArch64CC::getInvertedCondCode(CC);
1180 std::swap(Reg, OtherReg);
1181 }
1182 return true;
1183 }
1184
1185 // Attempt to fold:
1186 //
1187 // %xor = G_XOR %x, -1
1188 // %select = G_SELECT cc, %reg, %xor
1189 //
1190 // Into:
1191 // %select = CSINV %reg, %x, cc
1192 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1193 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1194 Reg = MatchReg;
1195 if (Invert) {
1196 CC = AArch64CC::getInvertedCondCode(CC);
1197 std::swap(Reg, OtherReg);
1198 }
1199 return true;
1200 }
1201
1202 // Attempt to fold:
1203 //
1204 // %add = G_ADD %x, 1
1205 // %select = G_SELECT cc, %reg, %add
1206 //
1207 // Into:
1208 // %select = CSINC %reg, %x, cc
1209 if (mi_match(Reg, MRI,
1210 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1211 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1212 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1213 Reg = MatchReg;
1214 if (Invert) {
1215 CC = AArch64CC::getInvertedCondCode(CC);
1216 std::swap(Reg, OtherReg);
1217 }
1218 return true;
1219 }
1220
1221 return false;
1222 };
1223
1224 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1225 // true/false values are constants.
1226 // FIXME: All of these patterns already exist in tablegen. We should be
1227 // able to import these.
1228 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1229 &Optimized]() {
1230 if (Optimized)
1231 return false;
1232 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1233 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1234 if (!TrueCst && !FalseCst)
1235 return false;
1236
1237 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1238 if (TrueCst && FalseCst) {
1239 int64_t T = TrueCst->Value.getSExtValue();
1240 int64_t F = FalseCst->Value.getSExtValue();
1241
1242 if (T == 0 && F == 1) {
1243 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1244 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1245 True = ZReg;
1246 False = ZReg;
1247 return true;
1248 }
1249
1250 if (T == 0 && F == -1) {
1251 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1252 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1253 True = ZReg;
1254 False = ZReg;
1255 return true;
1256 }
1257 }
1258
1259 if (TrueCst) {
1260 int64_t T = TrueCst->Value.getSExtValue();
1261 if (T == 1) {
1262 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1263 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1264 True = False;
1265 False = ZReg;
1266 CC = AArch64CC::getInvertedCondCode(CC);
1267 return true;
1268 }
1269
1270 if (T == -1) {
1271 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1272 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1273 True = False;
1274 False = ZReg;
1275 CC = AArch64CC::getInvertedCondCode(CC);
1276 return true;
1277 }
1278 }
1279
1280 if (FalseCst) {
1281 int64_t F = FalseCst->Value.getSExtValue();
1282 if (F == 1) {
1283 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1284 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1285 False = ZReg;
1286 return true;
1287 }
1288
1289 if (F == -1) {
1290 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1291 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1292 False = ZReg;
1293 return true;
1294 }
1295 }
1296 return false;
1297 };
1298
1299 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1300 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1301 Optimized |= TryOptSelectCst();
1302 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1303 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1304 return &*SelectInst;
1305}
1306
1307static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1308 switch (P) {
1309 default:
1310 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1310)
;
1311 case CmpInst::ICMP_NE:
1312 return AArch64CC::NE;
1313 case CmpInst::ICMP_EQ:
1314 return AArch64CC::EQ;
1315 case CmpInst::ICMP_SGT:
1316 return AArch64CC::GT;
1317 case CmpInst::ICMP_SGE:
1318 return AArch64CC::GE;
1319 case CmpInst::ICMP_SLT:
1320 return AArch64CC::LT;
1321 case CmpInst::ICMP_SLE:
1322 return AArch64CC::LE;
1323 case CmpInst::ICMP_UGT:
1324 return AArch64CC::HI;
1325 case CmpInst::ICMP_UGE:
1326 return AArch64CC::HS;
1327 case CmpInst::ICMP_ULT:
1328 return AArch64CC::LO;
1329 case CmpInst::ICMP_ULE:
1330 return AArch64CC::LS;
1331 }
1332}
1333
1334/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1335static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1336 AArch64CC::CondCode &CondCode,
1337 AArch64CC::CondCode &CondCode2) {
1338 CondCode2 = AArch64CC::AL;
1339 switch (CC) {
1340 default:
1341 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1341)
;
1342 case CmpInst::FCMP_OEQ:
1343 CondCode = AArch64CC::EQ;
1344 break;
1345 case CmpInst::FCMP_OGT:
1346 CondCode = AArch64CC::GT;
1347 break;
1348 case CmpInst::FCMP_OGE:
1349 CondCode = AArch64CC::GE;
1350 break;
1351 case CmpInst::FCMP_OLT:
1352 CondCode = AArch64CC::MI;
1353 break;
1354 case CmpInst::FCMP_OLE:
1355 CondCode = AArch64CC::LS;
1356 break;
1357 case CmpInst::FCMP_ONE:
1358 CondCode = AArch64CC::MI;
1359 CondCode2 = AArch64CC::GT;
1360 break;
1361 case CmpInst::FCMP_ORD:
1362 CondCode = AArch64CC::VC;
1363 break;
1364 case CmpInst::FCMP_UNO:
1365 CondCode = AArch64CC::VS;
1366 break;
1367 case CmpInst::FCMP_UEQ:
1368 CondCode = AArch64CC::EQ;
1369 CondCode2 = AArch64CC::VS;
1370 break;
1371 case CmpInst::FCMP_UGT:
1372 CondCode = AArch64CC::HI;
1373 break;
1374 case CmpInst::FCMP_UGE:
1375 CondCode = AArch64CC::PL;
1376 break;
1377 case CmpInst::FCMP_ULT:
1378 CondCode = AArch64CC::LT;
1379 break;
1380 case CmpInst::FCMP_ULE:
1381 CondCode = AArch64CC::LE;
1382 break;
1383 case CmpInst::FCMP_UNE:
1384 CondCode = AArch64CC::NE;
1385 break;
1386 }
1387}
1388
1389/// Convert an IR fp condition code to an AArch64 CC.
1390/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1391/// should be AND'ed instead of OR'ed.
1392static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1393 AArch64CC::CondCode &CondCode,
1394 AArch64CC::CondCode &CondCode2) {
1395 CondCode2 = AArch64CC::AL;
1396 switch (CC) {
1397 default:
1398 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1399 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1399, __extension__ __PRETTY_FUNCTION__))
;
1400 break;
1401 case CmpInst::FCMP_ONE:
1402 // (a one b)
1403 // == ((a olt b) || (a ogt b))
1404 // == ((a ord b) && (a une b))
1405 CondCode = AArch64CC::VC;
1406 CondCode2 = AArch64CC::NE;
1407 break;
1408 case CmpInst::FCMP_UEQ:
1409 // (a ueq b)
1410 // == ((a uno b) || (a oeq b))
1411 // == ((a ule b) && (a uge b))
1412 CondCode = AArch64CC::PL;
1413 CondCode2 = AArch64CC::LE;
1414 break;
1415 }
1416}
1417
1418/// Return a register which can be used as a bit to test in a TB(N)Z.
1419static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1420 MachineRegisterInfo &MRI) {
1421 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1421, __extension__ __PRETTY_FUNCTION__))
;
1422 bool HasZext = false;
1423 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1424 unsigned Opc = MI->getOpcode();
1425
1426 if (!MI->getOperand(0).isReg() ||
1427 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1428 break;
1429
1430 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1431 //
1432 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1433 // on the truncated x is the same as the bit number on x.
1434 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1435 Opc == TargetOpcode::G_TRUNC) {
1436 if (Opc == TargetOpcode::G_ZEXT)
1437 HasZext = true;
1438
1439 Register NextReg = MI->getOperand(1).getReg();
1440 // Did we find something worth folding?
1441 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1442 break;
1443
1444 // NextReg is worth folding. Keep looking.
1445 Reg = NextReg;
1446 continue;
1447 }
1448
1449 // Attempt to find a suitable operation with a constant on one side.
1450 Optional<uint64_t> C;
1451 Register TestReg;
1452 switch (Opc) {
1453 default:
1454 break;
1455 case TargetOpcode::G_AND:
1456 case TargetOpcode::G_XOR: {
1457 TestReg = MI->getOperand(1).getReg();
1458 Register ConstantReg = MI->getOperand(2).getReg();
1459 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1460 if (!VRegAndVal) {
1461 // AND commutes, check the other side for a constant.
1462 // FIXME: Can we canonicalize the constant so that it's always on the
1463 // same side at some point earlier?
1464 std::swap(ConstantReg, TestReg);
1465 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1466 }
1467 if (VRegAndVal) {
1468 if (HasZext)
1469 C = VRegAndVal->Value.getZExtValue();
1470 else
1471 C = VRegAndVal->Value.getSExtValue();
1472 }
1473 break;
1474 }
1475 case TargetOpcode::G_ASHR:
1476 case TargetOpcode::G_LSHR:
1477 case TargetOpcode::G_SHL: {
1478 TestReg = MI->getOperand(1).getReg();
1479 auto VRegAndVal =
1480 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1481 if (VRegAndVal)
1482 C = VRegAndVal->Value.getSExtValue();
1483 break;
1484 }
1485 }
1486
1487 // Didn't find a constant or viable register. Bail out of the loop.
1488 if (!C || !TestReg.isValid())
1489 break;
1490
1491 // We found a suitable instruction with a constant. Check to see if we can
1492 // walk through the instruction.
1493 Register NextReg;
1494 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1495 switch (Opc) {
1496 default:
1497 break;
1498 case TargetOpcode::G_AND:
1499 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1500 if ((*C >> Bit) & 1)
1501 NextReg = TestReg;
1502 break;
1503 case TargetOpcode::G_SHL:
1504 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1505 // the type of the register.
1506 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1507 NextReg = TestReg;
1508 Bit = Bit - *C;
1509 }
1510 break;
1511 case TargetOpcode::G_ASHR:
1512 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1513 // in x
1514 NextReg = TestReg;
1515 Bit = Bit + *C;
1516 if (Bit >= TestRegSize)
1517 Bit = TestRegSize - 1;
1518 break;
1519 case TargetOpcode::G_LSHR:
1520 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1521 if ((Bit + *C) < TestRegSize) {
1522 NextReg = TestReg;
1523 Bit = Bit + *C;
1524 }
1525 break;
1526 case TargetOpcode::G_XOR:
1527 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1528 // appropriate.
1529 //
1530 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1531 //
1532 // tbz x', b -> tbnz x, b
1533 //
1534 // Because x' only has the b-th bit set if x does not.
1535 if ((*C >> Bit) & 1)
1536 Invert = !Invert;
1537 NextReg = TestReg;
1538 break;
1539 }
1540
1541 // Check if we found anything worth folding.
1542 if (!NextReg.isValid())
1543 return Reg;
1544 Reg = NextReg;
1545 }
1546
1547 return Reg;
1548}
1549
1550MachineInstr *AArch64InstructionSelector::emitTestBit(
1551 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1552 MachineIRBuilder &MIB) const {
1553 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1553, __extension__ __PRETTY_FUNCTION__))
;
1554 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1555, __extension__ __PRETTY_FUNCTION__))
1555 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1555, __extension__ __PRETTY_FUNCTION__))
;
1556 MachineRegisterInfo &MRI = *MIB.getMRI();
1557
1558 // Attempt to optimize the test bit by walking over instructions.
1559 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1560 LLT Ty = MRI.getType(TestReg);
1561 unsigned Size = Ty.getSizeInBits();
1562 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1562, __extension__ __PRETTY_FUNCTION__))
;
1563 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1563, __extension__ __PRETTY_FUNCTION__))
;
1564
1565 // When the test register is a 64-bit register, we have to narrow to make
1566 // TBNZW work.
1567 bool UseWReg = Bit < 32;
1568 unsigned NecessarySize = UseWReg ? 32 : 64;
1569 if (Size != NecessarySize)
1570 TestReg = moveScalarRegClass(
1571 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1572 MIB);
1573
1574 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1575 {AArch64::TBZW, AArch64::TBNZW}};
1576 unsigned Opc = OpcTable[UseWReg][IsNegative];
1577 auto TestBitMI =
1578 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1579 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1580 return &*TestBitMI;
1581}
1582
1583bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1584 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1585 MachineIRBuilder &MIB) const {
1586 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1586, __extension__ __PRETTY_FUNCTION__))
;
1587 // Given something like this:
1588 //
1589 // %x = ...Something...
1590 // %one = G_CONSTANT i64 1
1591 // %zero = G_CONSTANT i64 0
1592 // %and = G_AND %x, %one
1593 // %cmp = G_ICMP intpred(ne), %and, %zero
1594 // %cmp_trunc = G_TRUNC %cmp
1595 // G_BRCOND %cmp_trunc, %bb.3
1596 //
1597 // We want to try and fold the AND into the G_BRCOND and produce either a
1598 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1599 //
1600 // In this case, we'd get
1601 //
1602 // TBNZ %x %bb.3
1603 //
1604
1605 // Check if the AND has a constant on its RHS which we can use as a mask.
1606 // If it's a power of 2, then it's the same as checking a specific bit.
1607 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1608 auto MaybeBit = getIConstantVRegValWithLookThrough(
1609 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1610 if (!MaybeBit)
1611 return false;
1612
1613 int32_t Bit = MaybeBit->Value.exactLogBase2();
1614 if (Bit < 0)
1615 return false;
1616
1617 Register TestReg = AndInst.getOperand(1).getReg();
1618
1619 // Emit a TB(N)Z.
1620 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1621 return true;
1622}
1623
1624MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1625 bool IsNegative,
1626 MachineBasicBlock *DestMBB,
1627 MachineIRBuilder &MIB) const {
1628 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1628, __extension__ __PRETTY_FUNCTION__))
;
1629 MachineRegisterInfo &MRI = *MIB.getMRI();
1630 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1632, __extension__ __PRETTY_FUNCTION__))
1631 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1632, __extension__ __PRETTY_FUNCTION__))
1632 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1632, __extension__ __PRETTY_FUNCTION__))
;
1633 auto Ty = MRI.getType(CompareReg);
1634 unsigned Width = Ty.getSizeInBits();
1635 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1635, __extension__ __PRETTY_FUNCTION__))
;
1636 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1636, __extension__ __PRETTY_FUNCTION__))
;
1637 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1638 {AArch64::CBNZW, AArch64::CBNZX}};
1639 unsigned Opc = OpcTable[IsNegative][Width == 64];
1640 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1641 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1642 return &*BranchMI;
1643}
1644
1645bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1646 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1647 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1647, __extension__ __PRETTY_FUNCTION__))
;
1648 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1648, __extension__ __PRETTY_FUNCTION__))
;
1649 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1650 // totally clean. Some of them require two branches to implement.
1651 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1652 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1653 Pred);
1654 AArch64CC::CondCode CC1, CC2;
1655 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1656 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1657 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1658 if (CC2 != AArch64CC::AL)
1659 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1660 I.eraseFromParent();
1661 return true;
1662}
1663
1664bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1665 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1666 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1666, __extension__ __PRETTY_FUNCTION__))
;
1667 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1667, __extension__ __PRETTY_FUNCTION__))
;
1668 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1669 //
1670 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1671 // instructions will not be produced, as they are conditional branch
1672 // instructions that do not set flags.
1673 if (!ProduceNonFlagSettingCondBr)
1674 return false;
1675
1676 MachineRegisterInfo &MRI = *MIB.getMRI();
1677 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1678 auto Pred =
1679 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1680 Register LHS = ICmp.getOperand(2).getReg();
1681 Register RHS = ICmp.getOperand(3).getReg();
1682
1683 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1684 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1685 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1686
1687 // When we can emit a TB(N)Z, prefer that.
1688 //
1689 // Handle non-commutative condition codes first.
1690 // Note that we don't want to do this when we have a G_AND because it can
1691 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1692 if (VRegAndVal && !AndInst) {
1693 int64_t C = VRegAndVal->Value.getSExtValue();
1694
1695 // When we have a greater-than comparison, we can just test if the msb is
1696 // zero.
1697 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1698 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1699 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1700 I.eraseFromParent();
1701 return true;
1702 }
1703
1704 // When we have a less than comparison, we can just test if the msb is not
1705 // zero.
1706 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1707 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1708 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1709 I.eraseFromParent();
1710 return true;
1711 }
1712 }
1713
1714 // Attempt to handle commutative condition codes. Right now, that's only
1715 // eq/ne.
1716 if (ICmpInst::isEquality(Pred)) {
1717 if (!VRegAndVal) {
1718 std::swap(RHS, LHS);
1719 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1720 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1721 }
1722
1723 if (VRegAndVal && VRegAndVal->Value == 0) {
1724 // If there's a G_AND feeding into this branch, try to fold it away by
1725 // emitting a TB(N)Z instead.
1726 //
1727 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1728 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1729 // would be redundant.
1730 if (AndInst &&
1731 tryOptAndIntoCompareBranch(
1732 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1733 I.eraseFromParent();
1734 return true;
1735 }
1736
1737 // Otherwise, try to emit a CB(N)Z instead.
1738 auto LHSTy = MRI.getType(LHS);
1739 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1740 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1741 I.eraseFromParent();
1742 return true;
1743 }
1744 }
1745 }
1746
1747 return false;
1748}
1749
1750bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1751 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1752 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1752, __extension__ __PRETTY_FUNCTION__))
;
1753 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1753, __extension__ __PRETTY_FUNCTION__))
;
1754 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1755 return true;
1756
1757 // Couldn't optimize. Emit a compare + a Bcc.
1758 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1759 auto PredOp = ICmp.getOperand(1);
1760 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1761 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1762 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1763 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1764 I.eraseFromParent();
1765 return true;
1766}
1767
1768bool AArch64InstructionSelector::selectCompareBranch(
1769 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1770 Register CondReg = I.getOperand(0).getReg();
1771 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1772 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1773 CondReg = CCMI->getOperand(1).getReg();
1774 CCMI = MRI.getVRegDef(CondReg);
1775 }
1776
1777 // Try to select the G_BRCOND using whatever is feeding the condition if
1778 // possible.
1779 unsigned CCMIOpc = CCMI->getOpcode();
1780 if (CCMIOpc == TargetOpcode::G_FCMP)
1781 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1782 if (CCMIOpc == TargetOpcode::G_ICMP)
1783 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1784
1785 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1786 // instructions will not be produced, as they are conditional branch
1787 // instructions that do not set flags.
1788 if (ProduceNonFlagSettingCondBr) {
1789 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1790 I.getOperand(1).getMBB(), MIB);
1791 I.eraseFromParent();
1792 return true;
1793 }
1794
1795 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1796 auto TstMI =
1797 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1798 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1799 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1800 .addImm(AArch64CC::EQ)
1801 .addMBB(I.getOperand(1).getMBB());
1802 I.eraseFromParent();
1803 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1804}
1805
1806/// Returns the element immediate value of a vector shift operand if found.
1807/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1808static Optional<int64_t> getVectorShiftImm(Register Reg,
1809 MachineRegisterInfo &MRI) {
1810 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1810, __extension__ __PRETTY_FUNCTION__))
;
1811 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1812 assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand"
) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1812, __extension__ __PRETTY_FUNCTION__))
;
1813 return getAArch64VectorSplatScalar(*OpMI, MRI);
1814}
1815
1816/// Matches and returns the shift immediate value for a SHL instruction given
1817/// a shift operand.
1818static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1819 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1820 if (!ShiftImm)
1821 return None;
1822 // Check the immediate is in range for a SHL.
1823 int64_t Imm = *ShiftImm;
1824 if (Imm < 0)
1825 return None;
1826 switch (SrcTy.getElementType().getSizeInBits()) {
1827 default:
1828 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1829 return None;
1830 case 8:
1831 if (Imm > 7)
1832 return None;
1833 break;
1834 case 16:
1835 if (Imm > 15)
1836 return None;
1837 break;
1838 case 32:
1839 if (Imm > 31)
1840 return None;
1841 break;
1842 case 64:
1843 if (Imm > 63)
1844 return None;
1845 break;
1846 }
1847 return Imm;
1848}
1849
1850bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1851 MachineRegisterInfo &MRI) {
1852 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1852, __extension__ __PRETTY_FUNCTION__))
;
1853 Register DstReg = I.getOperand(0).getReg();
1854 const LLT Ty = MRI.getType(DstReg);
1855 Register Src1Reg = I.getOperand(1).getReg();
1856 Register Src2Reg = I.getOperand(2).getReg();
1857
1858 if (!Ty.isVector())
1859 return false;
1860
1861 // Check if we have a vector of constants on RHS that we can select as the
1862 // immediate form.
1863 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1864
1865 unsigned Opc = 0;
1866 if (Ty == LLT::fixed_vector(2, 64)) {
1867 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1868 } else if (Ty == LLT::fixed_vector(4, 32)) {
1869 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1870 } else if (Ty == LLT::fixed_vector(2, 32)) {
1871 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1872 } else if (Ty == LLT::fixed_vector(4, 16)) {
1873 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1874 } else if (Ty == LLT::fixed_vector(8, 16)) {
1875 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1876 } else if (Ty == LLT::fixed_vector(16, 8)) {
1877 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1878 } else if (Ty == LLT::fixed_vector(8, 8)) {
1879 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1880 } else {
1881 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1882 return false;
1883 }
1884
1885 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1886 if (ImmVal)
1887 Shl.addImm(*ImmVal);
1888 else
1889 Shl.addUse(Src2Reg);
1890 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1891 I.eraseFromParent();
1892 return true;
1893}
1894
1895bool AArch64InstructionSelector::selectVectorAshrLshr(
1896 MachineInstr &I, MachineRegisterInfo &MRI) {
1897 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1898, __extension__ __PRETTY_FUNCTION__))
1898 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1898, __extension__ __PRETTY_FUNCTION__))
;
1899 Register DstReg = I.getOperand(0).getReg();
1900 const LLT Ty = MRI.getType(DstReg);
1901 Register Src1Reg = I.getOperand(1).getReg();
1902 Register Src2Reg = I.getOperand(2).getReg();
1903
1904 if (!Ty.isVector())
1905 return false;
1906
1907 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1908
1909 // We expect the immediate case to be lowered in the PostLegalCombiner to
1910 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1911
1912 // There is not a shift right register instruction, but the shift left
1913 // register instruction takes a signed value, where negative numbers specify a
1914 // right shift.
1915
1916 unsigned Opc = 0;
1917 unsigned NegOpc = 0;
1918 const TargetRegisterClass *RC =
1919 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1920 if (Ty == LLT::fixed_vector(2, 64)) {
1921 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1922 NegOpc = AArch64::NEGv2i64;
1923 } else if (Ty == LLT::fixed_vector(4, 32)) {
1924 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1925 NegOpc = AArch64::NEGv4i32;
1926 } else if (Ty == LLT::fixed_vector(2, 32)) {
1927 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1928 NegOpc = AArch64::NEGv2i32;
1929 } else if (Ty == LLT::fixed_vector(4, 16)) {
1930 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1931 NegOpc = AArch64::NEGv4i16;
1932 } else if (Ty == LLT::fixed_vector(8, 16)) {
1933 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1934 NegOpc = AArch64::NEGv8i16;
1935 } else if (Ty == LLT::fixed_vector(16, 8)) {
1936 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1937 NegOpc = AArch64::NEGv16i8;
1938 } else if (Ty == LLT::fixed_vector(8, 8)) {
1939 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1940 NegOpc = AArch64::NEGv8i8;
1941 } else {
1942 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1943 return false;
1944 }
1945
1946 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1947 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1948 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1949 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1950 I.eraseFromParent();
1951 return true;
1952}
1953
1954bool AArch64InstructionSelector::selectVaStartAAPCS(
1955 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1956 return false;
1957}
1958
1959bool AArch64InstructionSelector::selectVaStartDarwin(
1960 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1961 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1962 Register ListReg = I.getOperand(0).getReg();
1963
1964 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1965
1966 auto MIB =
1967 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1968 .addDef(ArgsAddrReg)
1969 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1970 .addImm(0)
1971 .addImm(0);
1972
1973 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1974
1975 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1976 .addUse(ArgsAddrReg)
1977 .addUse(ListReg)
1978 .addImm(0)
1979 .addMemOperand(*I.memoperands_begin());
1980
1981 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1982 I.eraseFromParent();
1983 return true;
1984}
1985
1986void AArch64InstructionSelector::materializeLargeCMVal(
1987 MachineInstr &I, const Value *V, unsigned OpFlags) {
1988 MachineBasicBlock &MBB = *I.getParent();
1989 MachineFunction &MF = *MBB.getParent();
1990 MachineRegisterInfo &MRI = MF.getRegInfo();
1991
1992 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1993 MovZ->addOperand(MF, I.getOperand(1));
1994 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1995 AArch64II::MO_NC);
1996 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1997 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1998
1999 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2000 Register ForceDstReg) {
2001 Register DstReg = ForceDstReg
2002 ? ForceDstReg
2003 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2004 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2005 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2006 MovI->addOperand(MF, MachineOperand::CreateGA(
2007 GV, MovZ->getOperand(1).getOffset(), Flags));
2008 } else {
2009 MovI->addOperand(
2010 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2011 MovZ->getOperand(1).getOffset(), Flags));
2012 }
2013 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2014 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
2015 return DstReg;
2016 };
2017 Register DstReg = BuildMovK(MovZ.getReg(0),
2018 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
2019 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2020 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2021}
2022
2023bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2024 MachineBasicBlock &MBB = *I.getParent();
2025 MachineFunction &MF = *MBB.getParent();
2026 MachineRegisterInfo &MRI = MF.getRegInfo();
2027
2028 switch (I.getOpcode()) {
2029 case TargetOpcode::G_STORE: {
2030 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2031 MachineOperand &SrcOp = I.getOperand(0);
2032 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2033 // Allow matching with imported patterns for stores of pointers. Unlike
2034 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2035 // and constrain.
2036 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2037 Register NewSrc = Copy.getReg(0);
2038 SrcOp.setReg(NewSrc);
2039 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2040 Changed = true;
2041 }
2042 return Changed;
2043 }
2044 case TargetOpcode::G_PTR_ADD:
2045 return convertPtrAddToAdd(I, MRI);
2046 case TargetOpcode::G_LOAD: {
2047 // For scalar loads of pointers, we try to convert the dest type from p0
2048 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2049 // conversion, this should be ok because all users should have been
2050 // selected already, so the type doesn't matter for them.
2051 Register DstReg = I.getOperand(0).getReg();
2052 const LLT DstTy = MRI.getType(DstReg);
2053 if (!DstTy.isPointer())
2054 return false;
2055 MRI.setType(DstReg, LLT::scalar(64));
2056 return true;
2057 }
2058 case AArch64::G_DUP: {
2059 // Convert the type from p0 to s64 to help selection.
2060 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2061 if (!DstTy.getElementType().isPointer())
2062 return false;
2063 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2064 MRI.setType(I.getOperand(0).getReg(),
2065 DstTy.changeElementType(LLT::scalar(64)));
2066 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2067 I.getOperand(1).setReg(NewSrc.getReg(0));
2068 return true;
2069 }
2070 case TargetOpcode::G_UITOFP:
2071 case TargetOpcode::G_SITOFP: {
2072 // If both source and destination regbanks are FPR, then convert the opcode
2073 // to G_SITOF so that the importer can select it to an fpr variant.
2074 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2075 // copy.
2076 Register SrcReg = I.getOperand(1).getReg();
2077 LLT SrcTy = MRI.getType(SrcReg);
2078 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2079 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2080 return false;
2081
2082 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2083 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2084 I.setDesc(TII.get(AArch64::G_SITOF));
2085 else
2086 I.setDesc(TII.get(AArch64::G_UITOF));
2087 return true;
2088 }
2089 return false;
2090 }
2091 default:
2092 return false;
2093 }
2094}
2095
2096/// This lowering tries to look for G_PTR_ADD instructions and then converts
2097/// them to a standard G_ADD with a COPY on the source.
2098///
2099/// The motivation behind this is to expose the add semantics to the imported
2100/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2101/// because the selector works bottom up, uses before defs. By the time we
2102/// end up trying to select a G_PTR_ADD, we should have already attempted to
2103/// fold this into addressing modes and were therefore unsuccessful.
2104bool AArch64InstructionSelector::convertPtrAddToAdd(
2105 MachineInstr &I, MachineRegisterInfo &MRI) {
2106 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2106, __extension__ __PRETTY_FUNCTION__))
;
2107 Register DstReg = I.getOperand(0).getReg();
2108 Register AddOp1Reg = I.getOperand(1).getReg();
2109 const LLT PtrTy = MRI.getType(DstReg);
2110 if (PtrTy.getAddressSpace() != 0)
2111 return false;
2112
2113 const LLT CastPtrTy =
2114 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2115 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2116 // Set regbanks on the registers.
2117 if (PtrTy.isVector())
2118 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2119 else
2120 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2121
2122 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2123 // %dst(intty) = G_ADD %intbase, off
2124 I.setDesc(TII.get(TargetOpcode::G_ADD));
2125 MRI.setType(DstReg, CastPtrTy);
2126 I.getOperand(1).setReg(PtrToInt.getReg(0));
2127 if (!select(*PtrToInt)) {
2128 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2129 return false;
2130 }
2131
2132 // Also take the opportunity here to try to do some optimization.
2133 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2134 Register NegatedReg;
2135 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2136 return true;
2137 I.getOperand(2).setReg(NegatedReg);
2138 I.setDesc(TII.get(TargetOpcode::G_SUB));
2139 return true;
2140}
2141
2142bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2143 MachineRegisterInfo &MRI) {
2144 // We try to match the immediate variant of LSL, which is actually an alias
2145 // for a special case of UBFM. Otherwise, we fall back to the imported
2146 // selector which will match the register variant.
2147 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2147, __extension__ __PRETTY_FUNCTION__))
;
2148 const auto &MO = I.getOperand(2);
2149 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2150 if (!VRegAndVal)
2151 return false;
2152
2153 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2154 if (DstTy.isVector())
2155 return false;
2156 bool Is64Bit = DstTy.getSizeInBits() == 64;
2157 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2158 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2159
2160 if (!Imm1Fn || !Imm2Fn)
2161 return false;
2162
2163 auto NewI =
2164 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2165 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2166
2167 for (auto &RenderFn : *Imm1Fn)
2168 RenderFn(NewI);
2169 for (auto &RenderFn : *Imm2Fn)
2170 RenderFn(NewI);
2171
2172 I.eraseFromParent();
2173 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2174}
2175
2176bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2177 MachineInstr &I, MachineRegisterInfo &MRI) {
2178 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2178, __extension__ __PRETTY_FUNCTION__))
;
2179 // If we're storing a scalar, it doesn't matter what register bank that
2180 // scalar is on. All that matters is the size.
2181 //
2182 // So, if we see something like this (with a 32-bit scalar as an example):
2183 //
2184 // %x:gpr(s32) = ... something ...
2185 // %y:fpr(s32) = COPY %x:gpr(s32)
2186 // G_STORE %y:fpr(s32)
2187 //
2188 // We can fix this up into something like this:
2189 //
2190 // G_STORE %x:gpr(s32)
2191 //
2192 // And then continue the selection process normally.
2193 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2194 if (!DefDstReg.isValid())
2195 return false;
2196 LLT DefDstTy = MRI.getType(DefDstReg);
2197 Register StoreSrcReg = I.getOperand(0).getReg();
2198 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2199
2200 // If we get something strange like a physical register, then we shouldn't
2201 // go any further.
2202 if (!DefDstTy.isValid())
2203 return false;
2204
2205 // Are the source and dst types the same size?
2206 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2207 return false;
2208
2209 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2210 RBI.getRegBank(DefDstReg, MRI, TRI))
2211 return false;
2212
2213 // We have a cross-bank copy, which is entering a store. Let's fold it.
2214 I.getOperand(0).setReg(DefDstReg);
2215 return true;
2216}
2217
2218bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2219 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2219, __extension__ __PRETTY_FUNCTION__))
;
2220 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2220, __extension__ __PRETTY_FUNCTION__))
;
2221
2222 MachineBasicBlock &MBB = *I.getParent();
2223 MachineFunction &MF = *MBB.getParent();
2224 MachineRegisterInfo &MRI = MF.getRegInfo();
2225
2226 switch (I.getOpcode()) {
2227 case AArch64::G_DUP: {
2228 // Before selecting a DUP instruction, check if it is better selected as a
2229 // MOV or load from a constant pool.
2230 Register Src = I.getOperand(1).getReg();
2231 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2232 if (!ValAndVReg)
2233 return false;
2234 LLVMContext &Ctx = MF.getFunction().getContext();
2235 Register Dst = I.getOperand(0).getReg();
2236 auto *CV = ConstantDataVector::getSplat(
2237 MRI.getType(Dst).getNumElements(),
2238 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2239 ValAndVReg->Value));
2240 if (!emitConstantVector(Dst, CV, MIB, MRI))
2241 return false;
2242 I.eraseFromParent();
2243 return true;
2244 }
2245 case TargetOpcode::G_SEXT:
2246 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2247 // over a normal extend.
2248 if (selectUSMovFromExtend(I, MRI))
2249 return true;
2250 return false;
2251 case TargetOpcode::G_BR:
2252 return false;
2253 case TargetOpcode::G_SHL:
2254 return earlySelectSHL(I, MRI);
2255 case TargetOpcode::G_CONSTANT: {
2256 bool IsZero = false;
2257 if (I.getOperand(1).isCImm())
2258 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2259 else if (I.getOperand(1).isImm())
2260 IsZero = I.getOperand(1).getImm() == 0;
2261
2262 if (!IsZero)
2263 return false;
2264
2265 Register DefReg = I.getOperand(0).getReg();
2266 LLT Ty = MRI.getType(DefReg);
2267 if (Ty.getSizeInBits() == 64) {
2268 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2269 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2270 } else if (Ty.getSizeInBits() == 32) {
2271 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2272 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2273 } else
2274 return false;
2275
2276 I.setDesc(TII.get(TargetOpcode::COPY));
2277 return true;
2278 }
2279
2280 case TargetOpcode::G_ADD: {
2281 // Check if this is being fed by a G_ICMP on either side.
2282 //
2283 // (cmp pred, x, y) + z
2284 //
2285 // In the above case, when the cmp is true, we increment z by 1. So, we can
2286 // fold the add into the cset for the cmp by using cinc.
2287 //
2288 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2289 Register AddDst = I.getOperand(0).getReg();
2290 Register AddLHS = I.getOperand(1).getReg();
2291 Register AddRHS = I.getOperand(2).getReg();
2292 // Only handle scalars.
2293 LLT Ty = MRI.getType(AddLHS);
2294 if (Ty.isVector())
2295 return false;
2296 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2297 // bits.
2298 unsigned Size = Ty.getSizeInBits();
2299 if (Size != 32 && Size != 64)
2300 return false;
2301 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2302 if (!MRI.hasOneNonDBGUse(Reg))
2303 return nullptr;
2304 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2305 // compare.
2306 if (Size == 32)
2307 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2308 // We model scalar compares using 32-bit destinations right now.
2309 // If it's a 64-bit compare, it'll have 64-bit sources.
2310 Register ZExt;
2311 if (!mi_match(Reg, MRI,
2312 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2313 return nullptr;
2314 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2315 if (!Cmp ||
2316 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2317 return nullptr;
2318 return Cmp;
2319 };
2320 // Try to match
2321 // z + (cmp pred, x, y)
2322 MachineInstr *Cmp = MatchCmp(AddRHS);
2323 if (!Cmp) {
2324 // (cmp pred, x, y) + z
2325 std::swap(AddLHS, AddRHS);
2326 Cmp = MatchCmp(AddRHS);
2327 if (!Cmp)
2328 return false;
2329 }
2330 auto &PredOp = Cmp->getOperand(1);
2331 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2332 const AArch64CC::CondCode InvCC =
2333 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2334 MIB.setInstrAndDebugLoc(I);
2335 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2336 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2337 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2338 I.eraseFromParent();
2339 return true;
2340 }
2341 case TargetOpcode::G_OR: {
2342 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2343 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2344 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2345 Register Dst = I.getOperand(0).getReg();
2346 LLT Ty = MRI.getType(Dst);
2347
2348 if (!Ty.isScalar())
2349 return false;
2350
2351 unsigned Size = Ty.getSizeInBits();
2352 if (Size != 32 && Size != 64)
2353 return false;
2354
2355 Register ShiftSrc;
2356 int64_t ShiftImm;
2357 Register MaskSrc;
2358 int64_t MaskImm;
2359 if (!mi_match(
2360 Dst, MRI,
2361 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2362 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2363 return false;
2364
2365 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2366 return false;
2367
2368 int64_t Immr = Size - ShiftImm;
2369 int64_t Imms = Size - ShiftImm - 1;
2370 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2371 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2372 I.eraseFromParent();
2373 return true;
2374 }
2375 default:
2376 return false;
2377 }
2378}
2379
2380bool AArch64InstructionSelector::select(MachineInstr &I) {
2381 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2381, __extension__ __PRETTY_FUNCTION__))
;
2382 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2382, __extension__ __PRETTY_FUNCTION__))
;
2383
2384 MachineBasicBlock &MBB = *I.getParent();
2385 MachineFunction &MF = *MBB.getParent();
2386 MachineRegisterInfo &MRI = MF.getRegInfo();
2387
2388 const AArch64Subtarget *Subtarget =
2389 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2390 if (Subtarget->requiresStrictAlign()) {
2391 // We don't support this feature yet.
2392 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2393 return false;
2394 }
2395
2396 MIB.setInstrAndDebugLoc(I);
2397
2398 unsigned Opcode = I.getOpcode();
2399 // G_PHI requires same handling as PHI
2400 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2401 // Certain non-generic instructions also need some special handling.
2402
2403 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2404 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2405
2406 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2407 const Register DefReg = I.getOperand(0).getReg();
2408 const LLT DefTy = MRI.getType(DefReg);
2409
2410 const RegClassOrRegBank &RegClassOrBank =
2411 MRI.getRegClassOrRegBank(DefReg);
2412
2413 const TargetRegisterClass *DefRC
2414 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2415 if (!DefRC) {
2416 if (!DefTy.isValid()) {
2417 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2418 return false;
2419 }
2420 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2421 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2422 if (!DefRC) {
2423 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2424 return false;
2425 }
2426 }
2427
2428 I.setDesc(TII.get(TargetOpcode::PHI));
2429
2430 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2431 }
2432
2433 if (I.isCopy())
2434 return selectCopy(I, TII, MRI, TRI, RBI);
2435
2436 return true;
2437 }
2438
2439
2440 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2441 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2442 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2443 return false;
2444 }
2445
2446 // Try to do some lowering before we start instruction selecting. These
2447 // lowerings are purely transformations on the input G_MIR and so selection
2448 // must continue after any modification of the instruction.
2449 if (preISelLower(I)) {
2450 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2451 }
2452
2453 // There may be patterns where the importer can't deal with them optimally,
2454 // but does select it to a suboptimal sequence so our custom C++ selection
2455 // code later never has a chance to work on it. Therefore, we have an early
2456 // selection attempt here to give priority to certain selection routines
2457 // over the imported ones.
2458 if (earlySelect(I))
2459 return true;
2460
2461 if (selectImpl(I, *CoverageInfo))
2462 return true;
2463
2464 LLT Ty =
2465 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2466
2467 switch (Opcode) {
2468 case TargetOpcode::G_SBFX:
2469 case TargetOpcode::G_UBFX: {
2470 static const unsigned OpcTable[2][2] = {
2471 {AArch64::UBFMWri, AArch64::UBFMXri},
2472 {AArch64::SBFMWri, AArch64::SBFMXri}};
2473 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2474 unsigned Size = Ty.getSizeInBits();
2475 unsigned Opc = OpcTable[IsSigned][Size == 64];
2476 auto Cst1 =
2477 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2478 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2478, __extension__ __PRETTY_FUNCTION__))
;
2479 auto Cst2 =
2480 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2481 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2481, __extension__ __PRETTY_FUNCTION__))
;
2482 auto LSB = Cst1->Value.getZExtValue();
2483 auto Width = Cst2->Value.getZExtValue();
2484 auto BitfieldInst =
2485 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2486 .addImm(LSB)
2487 .addImm(LSB + Width - 1);
2488 I.eraseFromParent();
2489 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2490 }
2491 case TargetOpcode::G_BRCOND:
2492 return selectCompareBranch(I, MF, MRI);
2493
2494 case TargetOpcode::G_BRINDIRECT: {
2495 I.setDesc(TII.get(AArch64::BR));
2496 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2497 }
2498
2499 case TargetOpcode::G_BRJT:
2500 return selectBrJT(I, MRI);
2501
2502 case AArch64::G_ADD_LOW: {
2503 // This op may have been separated from it's ADRP companion by the localizer
2504 // or some other code motion pass. Given that many CPUs will try to
2505 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2506 // which will later be expanded into an ADRP+ADD pair after scheduling.
2507 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2508 if (BaseMI->getOpcode() != AArch64::ADRP) {
2509 I.setDesc(TII.get(AArch64::ADDXri));
2510 I.addOperand(MachineOperand::CreateImm(0));
2511 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2512 }
2513 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2514, __extension__ __PRETTY_FUNCTION__))
2514 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2514, __extension__ __PRETTY_FUNCTION__))
;
2515 auto Op1 = BaseMI->getOperand(1);
2516 auto Op2 = I.getOperand(2);
2517 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2518 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2519 Op1.getTargetFlags())
2520 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2521 Op2.getTargetFlags());
2522 I.eraseFromParent();
2523 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2524 }
2525
2526 case TargetOpcode::G_BSWAP: {
2527 // Handle vector types for G_BSWAP directly.
2528 Register DstReg = I.getOperand(0).getReg();
2529 LLT DstTy = MRI.getType(DstReg);
2530
2531 // We should only get vector types here; everything else is handled by the
2532 // importer right now.
2533 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2534 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2535 return false;
2536 }
2537
2538 // Only handle 4 and 2 element vectors for now.
2539 // TODO: 16-bit elements.
2540 unsigned NumElts = DstTy.getNumElements();
2541 if (NumElts != 4 && NumElts != 2) {
2542 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2543 return false;
2544 }
2545
2546 // Choose the correct opcode for the supported types. Right now, that's
2547 // v2s32, v4s32, and v2s64.
2548 unsigned Opc = 0;
2549 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2550 if (EltSize == 32)
2551 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2552 : AArch64::REV32v16i8;
2553 else if (EltSize == 64)
2554 Opc = AArch64::REV64v16i8;
2555
2556 // We should always get something by the time we get here...
2557 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2557, __extension__ __PRETTY_FUNCTION__))
;
2558
2559 I.setDesc(TII.get(Opc));
2560 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2561 }
2562
2563 case TargetOpcode::G_FCONSTANT:
2564 case TargetOpcode::G_CONSTANT: {
2565 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2566
2567 const LLT s8 = LLT::scalar(8);
2568 const LLT s16 = LLT::scalar(16);
2569 const LLT s32 = LLT::scalar(32);
2570 const LLT s64 = LLT::scalar(64);
2571 const LLT s128 = LLT::scalar(128);
2572 const LLT p0 = LLT::pointer(0, 64);
2573
2574 const Register DefReg = I.getOperand(0).getReg();
2575 const LLT DefTy = MRI.getType(DefReg);
2576 const unsigned DefSize = DefTy.getSizeInBits();
2577 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2578
2579 // FIXME: Redundant check, but even less readable when factored out.
2580 if (isFP) {
2581 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2582 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2583 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2584 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2585 return false;
2586 }
2587
2588 if (RB.getID() != AArch64::FPRRegBankID) {
2589 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2590 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2591 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2592 return false;
2593 }
2594
2595 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2596 // can be sure tablegen works correctly and isn't rescued by this code.
2597 // 0.0 is not covered by tablegen for FP128. So we will handle this
2598 // scenario in the code here.
2599 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2600 return false;
2601 } else {
2602 // s32 and s64 are covered by tablegen.
2603 if (Ty != p0 && Ty != s8 && Ty != s16) {
2604 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2605 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2606 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2607 return false;
2608 }
2609
2610 if (RB.getID() != AArch64::GPRRegBankID) {
2611 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2612 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2613 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2614 return false;
2615 }
2616 }
2617
2618 if (isFP) {
2619 const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2620 // For 16, 64, and 128b values, emit a constant pool load.
2621 switch (DefSize) {
2622 default:
2623 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2623)
;
2624 case 32:
2625 // For s32, use a cp load if we have optsize/minsize.
2626 if (!shouldOptForSize(&MF))
2627 break;
2628 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2629 case 16:
2630 case 64:
2631 case 128: {
2632 auto *FPImm = I.getOperand(1).getFPImm();
2633 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2634 if (!LoadMI) {
2635 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2636 return false;
2637 }
2638 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2639 I.eraseFromParent();
2640 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2641 }
2642 }
2643
2644 // Either emit a FMOV, or emit a copy to emit a normal mov.
2645 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2646, __extension__ __PRETTY_FUNCTION__))
2646 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2646, __extension__ __PRETTY_FUNCTION__))
;
2647 const Register DefGPRReg =
2648 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2649 MachineOperand &RegOp = I.getOperand(0);
2650 RegOp.setReg(DefGPRReg);
2651 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2652 MIB.buildCopy({DefReg}, {DefGPRReg});
2653
2654 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2655 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2656 return false;
2657 }
2658
2659 MachineOperand &ImmOp = I.getOperand(1);
2660 // FIXME: Is going through int64_t always correct?
2661 ImmOp.ChangeToImmediate(
2662 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2663 } else if (I.getOperand(1).isCImm()) {
2664 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2665 I.getOperand(1).ChangeToImmediate(Val);
2666 } else if (I.getOperand(1).isImm()) {
2667 uint64_t Val = I.getOperand(1).getImm();
2668 I.getOperand(1).ChangeToImmediate(Val);
2669 }
2670
2671 const unsigned MovOpc =
2672 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2673 I.setDesc(TII.get(MovOpc));
2674 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2675 return true;
2676 }
2677 case TargetOpcode::G_EXTRACT: {
2678 Register DstReg = I.getOperand(0).getReg();
2679 Register SrcReg = I.getOperand(1).getReg();
2680 LLT SrcTy = MRI.getType(SrcReg);
2681 LLT DstTy = MRI.getType(DstReg);
2682 (void)DstTy;
2683 unsigned SrcSize = SrcTy.getSizeInBits();
2684
2685 if (SrcTy.getSizeInBits() > 64) {
2686 // This should be an extract of an s128, which is like a vector extract.
2687 if (SrcTy.getSizeInBits() != 128)
2688 return false;
2689 // Only support extracting 64 bits from an s128 at the moment.
2690 if (DstTy.getSizeInBits() != 64)
2691 return false;
2692
2693 unsigned Offset = I.getOperand(2).getImm();
2694 if (Offset % 64 != 0)
2695 return false;
2696
2697 // Check we have the right regbank always.
2698 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2699 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2700 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2700, __extension__ __PRETTY_FUNCTION__))
;
2701
2702 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2703 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2704 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2705 I.eraseFromParent();
2706 return true;
2707 }
2708
2709 // Emit the same code as a vector extract.
2710 // Offset must be a multiple of 64.
2711 unsigned LaneIdx = Offset / 64;
2712 MachineInstr *Extract = emitExtractVectorElt(
2713 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2714 if (!Extract)
2715 return false;
2716 I.eraseFromParent();
2717 return true;
2718 }
2719
2720 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2721 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2722 Ty.getSizeInBits() - 1);
2723
2724 if (SrcSize < 64) {
2725 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
2726 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
;
2727 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2728 }
2729
2730 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2731 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2732 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2733 .addReg(DstReg, 0, AArch64::sub_32);
2734 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2735 AArch64::GPR32RegClass, MRI);
2736 I.getOperand(0).setReg(DstReg);
2737
2738 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2739 }
2740
2741 case TargetOpcode::G_INSERT: {
2742 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2743 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2744 unsigned DstSize = DstTy.getSizeInBits();
2745 // Larger inserts are vectors, same-size ones should be something else by
2746 // now (split up or turned into COPYs).
2747 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2748 return false;
2749
2750 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2751 unsigned LSB = I.getOperand(3).getImm();
2752 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2753 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2754 MachineInstrBuilder(MF, I).addImm(Width - 1);
2755
2756 if (DstSize < 64) {
2757 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
2758 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
;
2759 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2760 }
2761
2762 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2763 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2764 TII.get(AArch64::SUBREG_TO_REG))
2765 .addDef(SrcReg)
2766 .addImm(0)
2767 .addUse(I.getOperand(2).getReg())
2768 .addImm(AArch64::sub_32);
2769 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2770 AArch64::GPR32RegClass, MRI);
2771 I.getOperand(2).setReg(SrcReg);
2772
2773 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2774 }
2775 case TargetOpcode::G_FRAME_INDEX: {
2776 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2777 if (Ty != LLT::pointer(0, 64)) {
2778 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2779 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2780 return false;
2781 }
2782 I.setDesc(TII.get(AArch64::ADDXri));
2783
2784 // MOs for a #0 shifted immediate.
2785 I.addOperand(MachineOperand::CreateImm(0));
2786 I.addOperand(MachineOperand::CreateImm(0));
2787
2788 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2789 }
2790
2791 case TargetOpcode::G_GLOBAL_VALUE: {
2792 auto GV = I.getOperand(1).getGlobal();
2793 if (GV->isThreadLocal())
2794 return selectTLSGlobalValue(I, MRI);
2795
2796 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2797 if (OpFlags & AArch64II::MO_GOT) {
2798 I.setDesc(TII.get(AArch64::LOADgot));
2799 I.getOperand(1).setTargetFlags(OpFlags);
2800 } else if (TM.getCodeModel() == CodeModel::Large) {
2801 // Materialize the global using movz/movk instructions.
2802 materializeLargeCMVal(I, GV, OpFlags);
2803 I.eraseFromParent();
2804 return true;
2805 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2806 I.setDesc(TII.get(AArch64::ADR));
2807 I.getOperand(1).setTargetFlags(OpFlags);
2808 } else {
2809 I.setDesc(TII.get(AArch64::MOVaddr));
2810 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2811 MachineInstrBuilder MIB(MF, I);
2812 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2813 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2814 }
2815 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2816 }
2817
2818 case TargetOpcode::G_ZEXTLOAD:
2819 case TargetOpcode::G_LOAD:
2820 case TargetOpcode::G_STORE: {
2821 GLoadStore &LdSt = cast<GLoadStore>(I);
2822 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2823 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2824
2825 if (PtrTy != LLT::pointer(0, 64)) {
2826 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2827 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2828 return false;
2829 }
2830
2831 uint64_t MemSizeInBytes = LdSt.getMemSize();
2832 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2833 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2834
2835 // Need special instructions for atomics that affect ordering.
2836 if (Order != AtomicOrdering::NotAtomic &&
2837 Order != AtomicOrdering::Unordered &&
2838 Order != AtomicOrdering::Monotonic) {
2839 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2839, __extension__ __PRETTY_FUNCTION__))
;
2840 if (MemSizeInBytes > 64)
2841 return false;
2842
2843 if (isa<GLoad>(LdSt)) {
2844 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2845 AArch64::LDARW, AArch64::LDARX};
2846 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2847 } else {
2848 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2849 AArch64::STLRW, AArch64::STLRX};
2850 Register ValReg = LdSt.getReg(0);
2851 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2852 // Emit a subreg copy of 32 bits.
2853 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2854 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2855 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2856 I.getOperand(0).setReg(NewVal);
2857 }
2858 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2859 }
2860 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2861 return true;
2862 }
2863
2864#ifndef NDEBUG
2865 const Register PtrReg = LdSt.getPointerReg();
2866 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2867 // Check that the pointer register is valid.
2868 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2869, __extension__ __PRETTY_FUNCTION__))
2869 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2869, __extension__ __PRETTY_FUNCTION__))
;
2870 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2871, __extension__ __PRETTY_FUNCTION__))
2871 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2871, __extension__ __PRETTY_FUNCTION__))
;
2872#endif
2873
2874 const Register ValReg = LdSt.getReg(0);
2875 const LLT ValTy = MRI.getType(ValReg);
2876 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2877
2878 // The code below doesn't support truncating stores, so we need to split it
2879 // again.
2880 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2881 unsigned SubReg;
2882 LLT MemTy = LdSt.getMMO().getMemoryType();
2883 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2884 if (!getSubRegForClass(RC, TRI, SubReg))
2885 return false;
2886
2887 // Generate a subreg copy.
2888 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2889 .addReg(ValReg, 0, SubReg)
2890 .getReg(0);
2891 RBI.constrainGenericRegister(Copy, *RC, MRI);
2892 LdSt.getOperand(0).setReg(Copy);
2893 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2894 // If this is an any-extending load from the FPR bank, split it into a regular
2895 // load + extend.
2896 if (RB.getID() == AArch64::FPRRegBankID) {
2897 unsigned SubReg;
2898 LLT MemTy = LdSt.getMMO().getMemoryType();
2899 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2900 if (!getSubRegForClass(RC, TRI, SubReg))
2901 return false;
2902 Register OldDst = LdSt.getReg(0);
2903 Register NewDst =
2904 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2905 LdSt.getOperand(0).setReg(NewDst);
2906 MRI.setRegBank(NewDst, RB);
2907 // Generate a SUBREG_TO_REG to extend it.
2908 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2909 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2910 .addImm(0)
2911 .addUse(NewDst)
2912 .addImm(SubReg);
2913 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2914 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2915 MIB.setInstr(LdSt);
2916 }
2917 }
2918
2919 // Helper lambda for partially selecting I. Either returns the original
2920 // instruction with an updated opcode, or a new instruction.
2921 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2922 bool IsStore = isa<GStore>(I);
1
Assuming 'I' is not a 'GStore'
2923 const unsigned NewOpc =
2924 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2925 if (NewOpc == I.getOpcode())
2
Taking false branch
2926 return nullptr;
2927 // Check if we can fold anything into the addressing mode.
2928 auto AddrModeFns =
2929 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2930 if (!AddrModeFns) {
2931 // Can't fold anything. Use the original instruction.
2932 I.setDesc(TII.get(NewOpc));
2933 I.addOperand(MachineOperand::CreateImm(0));
2934 return &I;
2935 }
2936
2937 // Folded something. Create a new instruction and return it.
2938 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2939 Register CurValReg = I.getOperand(0).getReg();
2940 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2941 NewInst.cloneMemRefs(I);
2942 for (auto &Fn : *AddrModeFns)
2943 Fn(NewInst);
2944 I.eraseFromParent();
2945 return &*NewInst;
2946 };
2947
2948 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2949 if (!LoadStore)
2950 return false;
2951
2952 // If we're storing a 0, use WZR/XZR.
2953 if (Opcode == TargetOpcode::G_STORE) {
2954 auto CVal = getIConstantVRegValWithLookThrough(
2955 LoadStore->getOperand(0).getReg(), MRI);
2956 if (CVal && CVal->Value == 0) {
2957 switch (LoadStore->getOpcode()) {
2958 case AArch64::STRWui:
2959 case AArch64::STRHHui:
2960 case AArch64::STRBBui:
2961 LoadStore->getOperand(0).setReg(AArch64::WZR);
2962 break;
2963 case AArch64::STRXui:
2964 LoadStore->getOperand(0).setReg(AArch64::XZR);
2965 break;
2966 }
2967 }
2968 }
2969
2970 if (IsZExtLoad) {
2971 // The zextload from a smaller type to i32 should be handled by the
2972 // importer.
2973 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2974 return false;
2975 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2976 // and zero_extend with SUBREG_TO_REG.
2977 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2978 Register DstReg = LoadStore->getOperand(0).getReg();
2979 LoadStore->getOperand(0).setReg(LdReg);
2980
2981 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2982 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2983 .addImm(0)
2984 .addUse(LdReg)
2985 .addImm(AArch64::sub_32);
2986 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2987 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2988 MRI);
2989 }
2990 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2991 }
2992
2993 case TargetOpcode::G_SMULH:
2994 case TargetOpcode::G_UMULH: {
2995 // Reject the various things we don't support yet.
2996 if (unsupportedBinOp(I, RBI, MRI, TRI))
2997 return false;
2998
2999 const Register DefReg = I.getOperand(0).getReg();
3000 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3001
3002 if (RB.getID() != AArch64::GPRRegBankID) {
3003 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
3004 return false;
3005 }
3006
3007 if (Ty != LLT::scalar(64)) {
3008 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
3009 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
3010 return false;
3011 }
3012
3013 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3014 : AArch64::UMULHrr;
3015 I.setDesc(TII.get(NewOpc));
3016
3017 // Now that we selected an opcode, we need to constrain the register
3018 // operands to use appropriate classes.
3019 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3020 }
3021 case TargetOpcode::G_LSHR:
3022 case TargetOpcode::G_ASHR:
3023 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3024 return selectVectorAshrLshr(I, MRI);
3025 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3026 case TargetOpcode::G_SHL:
3027 if (Opcode == TargetOpcode::G_SHL &&
3028 MRI.getType(I.getOperand(0).getReg()).isVector())
3029 return selectVectorSHL(I, MRI);
3030
3031 // These shifts were legalized to have 64 bit shift amounts because we
3032 // want to take advantage of the selection patterns that assume the
3033 // immediates are s64s, however, selectBinaryOp will assume both operands
3034 // will have the same bit size.
3035 {
3036 Register SrcReg = I.getOperand(1).getReg();
3037 Register ShiftReg = I.getOperand(2).getReg();
3038 const LLT ShiftTy = MRI.getType(ShiftReg);
3039 const LLT SrcTy = MRI.getType(SrcReg);
3040 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3041 ShiftTy.getSizeInBits() == 64) {
3042 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3042, __extension__ __PRETTY_FUNCTION__))
;
3043 assert(MRI.getVRegDef(ShiftReg) &&(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3044, __extension__ __PRETTY_FUNCTION__))
3044 "could not find a vreg definition for shift amount")(static_cast <bool> (MRI.getVRegDef(ShiftReg) &&
"could not find a vreg definition for shift amount") ? void (
0) : __assert_fail ("MRI.getVRegDef(ShiftReg) && \"could not find a vreg definition for shift amount\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3044, __extension__ __PRETTY_FUNCTION__))
;
3045 // Insert a subregister copy to implement a 64->32 trunc
3046 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3047 .addReg(ShiftReg, 0, AArch64::sub_32);
3048 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3049 I.getOperand(2).setReg(Trunc.getReg(0));
3050 }
3051 }
3052 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3053 case TargetOpcode::G_OR: {
3054 // Reject the various things we don't support yet.
3055 if (unsupportedBinOp(I, RBI, MRI, TRI))
3056 return false;
3057
3058 const unsigned OpSize = Ty.getSizeInBits();
3059
3060 const Register DefReg = I.getOperand(0).getReg();
3061 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3062
3063 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3064 if (NewOpc == I.getOpcode())
3065 return false;
3066
3067 I.setDesc(TII.get(NewOpc));
3068 // FIXME: Should the type be always reset in setDesc?
3069
3070 // Now that we selected an opcode, we need to constrain the register
3071 // operands to use appropriate classes.
3072 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3073 }
3074
3075 case TargetOpcode::G_PTR_ADD: {
3076 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3077 I.eraseFromParent();
3078 return true;
3079 }
3080 case TargetOpcode::G_SADDO:
3081 case TargetOpcode::G_UADDO:
3082 case TargetOpcode::G_SSUBO:
3083 case TargetOpcode::G_USUBO: {
3084 // Emit the operation and get the correct condition code.
3085 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3086 I.getOperand(2), I.getOperand(3), MIB);
3087
3088 // Now, put the overflow result in the register given by the first operand
3089 // to the overflow op. CSINC increments the result when the predicate is
3090 // false, so to get the increment when it's true, we need to use the
3091 // inverse. In this case, we want to increment when carry is set.
3092 Register ZReg = AArch64::WZR;
3093 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3094 getInvertedCondCode(OpAndCC.second), MIB);
3095 I.eraseFromParent();
3096 return true;
3097 }
3098
3099 case TargetOpcode::G_PTRMASK: {
3100 Register MaskReg = I.getOperand(2).getReg();
3101 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3102 // TODO: Implement arbitrary cases
3103 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3104 return false;
3105
3106 uint64_t Mask = *MaskVal;
3107 I.setDesc(TII.get(AArch64::ANDXri));
3108 I.getOperand(2).ChangeToImmediate(
3109 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3110
3111 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3112 }
3113 case TargetOpcode::G_PTRTOINT:
3114 case TargetOpcode::G_TRUNC: {
3115 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3116 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3117
3118 const Register DstReg = I.getOperand(0).getReg();
3119 const Register SrcReg = I.getOperand(1).getReg();
3120
3121 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3122 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3123
3124 if (DstRB.getID() != SrcRB.getID()) {
3125 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3126 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3127 return false;
3128 }
3129
3130 if (DstRB.getID() == AArch64::GPRRegBankID) {
3131 const TargetRegisterClass *DstRC =
3132 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3133 if (!DstRC)
3134 return false;
3135
3136 const TargetRegisterClass *SrcRC =
3137 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3138 if (!SrcRC)
3139 return false;
3140
3141 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3142 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3143 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3144 return false;
3145 }
3146
3147 if (DstRC == SrcRC) {
3148 // Nothing to be done
3149 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3150 SrcTy == LLT::scalar(64)) {
3151 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3151)
;
3152 return false;
3153 } else if (DstRC == &AArch64::GPR32RegClass &&
3154 SrcRC == &AArch64::GPR64RegClass) {
3155 I.getOperand(1).setSubReg(AArch64::sub_32);
3156 } else {
3157 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3158 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3159 return false;
3160 }
3161
3162 I.setDesc(TII.get(TargetOpcode::COPY));
3163 return true;
3164 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3165 if (DstTy == LLT::fixed_vector(4, 16) &&
3166 SrcTy == LLT::fixed_vector(4, 32)) {
3167 I.setDesc(TII.get(AArch64::XTNv4i16));
3168 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3169 return true;
3170 }
3171
3172 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3173 MachineInstr *Extract = emitExtractVectorElt(
3174 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3175 if (!Extract)
3176 return false;
3177 I.eraseFromParent();
3178 return true;
3179 }
3180
3181 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3182 if (Opcode == TargetOpcode::G_PTRTOINT) {
3183 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3183, __extension__ __PRETTY_FUNCTION__))
;
3184 I.setDesc(TII.get(TargetOpcode::COPY));
3185 return selectCopy(I, TII, MRI, TRI, RBI);
3186 }
3187 }
3188
3189 return false;
3190 }
3191
3192 case TargetOpcode::G_ANYEXT: {
3193 if (selectUSMovFromExtend(I, MRI))
3194 return true;
3195
3196 const Register DstReg = I.getOperand(0).getReg();
3197 const Register SrcReg = I.getOperand(1).getReg();
3198
3199 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3200 if (RBDst.getID() != AArch64::GPRRegBankID) {
3201 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3202 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3203 return false;
3204 }
3205
3206 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3207 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3208 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3209 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3210 return false;
3211 }
3212
3213 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3214
3215 if (DstSize == 0) {
3216 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3217 return false;
3218 }
3219
3220 if (DstSize != 64 && DstSize > 32) {
3221 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3222 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3223 return false;
3224 }
3225 // At this point G_ANYEXT is just like a plain COPY, but we need
3226 // to explicitly form the 64-bit value if any.
3227 if (DstSize > 32) {
3228 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3229 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3230 .addDef(ExtSrc)
3231 .addImm(0)
3232 .addUse(SrcReg)
3233 .addImm(AArch64::sub_32);
3234 I.getOperand(1).setReg(ExtSrc);
3235 }
3236 return selectCopy(I, TII, MRI, TRI, RBI);
3237 }
3238
3239 case TargetOpcode::G_ZEXT:
3240 case TargetOpcode::G_SEXT_INREG:
3241 case TargetOpcode::G_SEXT: {
3242 if (selectUSMovFromExtend(I, MRI))
3243 return true;
3244
3245 unsigned Opcode = I.getOpcode();
3246 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3247 const Register DefReg = I.getOperand(0).getReg();
3248 Register SrcReg = I.getOperand(1).getReg();
3249 const LLT DstTy = MRI.getType(DefReg);
3250 const LLT SrcTy = MRI.getType(SrcReg);
3251 unsigned DstSize = DstTy.getSizeInBits();
3252 unsigned SrcSize = SrcTy.getSizeInBits();
3253
3254 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3255 // extended is encoded in the imm.
3256 if (Opcode == TargetOpcode::G_SEXT_INREG)
3257 SrcSize = I.getOperand(2).getImm();
3258
3259 if (DstTy.isVector())
3260 return false; // Should be handled by imported patterns.
3261
3262 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3264, __extension__ __PRETTY_FUNCTION__))
3263 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3264, __extension__ __PRETTY_FUNCTION__))
3264 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3264, __extension__ __PRETTY_FUNCTION__))
;
3265
3266 MachineInstr *ExtI;
3267
3268 // First check if we're extending the result of a load which has a dest type
3269 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3270 // GPR register on AArch64 and all loads which are smaller automatically
3271 // zero-extend the upper bits. E.g.
3272 // %v(s8) = G_LOAD %p, :: (load 1)
3273 // %v2(s32) = G_ZEXT %v(s8)
3274 if (!IsSigned) {
3275 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3276 bool IsGPR =
3277 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3278 if (LoadMI && IsGPR) {
3279 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3280 unsigned BytesLoaded = MemOp->getSize();
3281 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3282 return selectCopy(I, TII, MRI, TRI, RBI);
3283 }
3284
3285 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3286 // + SUBREG_TO_REG.
3287 //
3288 // If we are zero extending from 32 bits to 64 bits, it's possible that
3289 // the instruction implicitly does the zero extend for us. In that case,
3290 // we only need the SUBREG_TO_REG.
3291 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3292 // Unlike with the G_LOAD case, we don't want to look through copies
3293 // here. (See isDef32.)
3294 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3295 Register SubregToRegSrc = SrcReg;
3296
3297 // Does the instruction implicitly zero extend?
3298 if (!Def || !isDef32(*Def)) {
3299 // No. Zero out using an OR.
3300 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3301 const Register ZReg = AArch64::WZR;
3302 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3303 SubregToRegSrc = OrDst;
3304 }
3305
3306 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3307 .addImm(0)
3308 .addUse(SubregToRegSrc)
3309 .addImm(AArch64::sub_32);
3310
3311 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3312 MRI)) {
3313 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3314 return false;
3315 }
3316
3317 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3318 MRI)) {
3319 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3320 return false;
3321 }
3322
3323 I.eraseFromParent();
3324 return true;
3325 }
3326 }
3327
3328 if (DstSize == 64) {
3329 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3330 // FIXME: Can we avoid manually doing this?
3331 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3332 MRI)) {
3333 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3334 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3335 return false;
3336 }
3337 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3338 {&AArch64::GPR64RegClass}, {})
3339 .addImm(0)
3340 .addUse(SrcReg)
3341 .addImm(AArch64::sub_32)
3342 .getReg(0);
3343 }
3344
3345 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3346 {DefReg}, {SrcReg})
3347 .addImm(0)
3348 .addImm(SrcSize - 1);
3349 } else if (DstSize <= 32) {
3350 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3351 {DefReg}, {SrcReg})
3352 .addImm(0)
3353 .addImm(SrcSize - 1);
3354 } else {
3355 return false;
3356 }
3357
3358 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3359 I.eraseFromParent();
3360 return true;
3361 }
3362
3363 case TargetOpcode::G_SITOFP:
3364 case TargetOpcode::G_UITOFP:
3365 case TargetOpcode::G_FPTOSI:
3366 case TargetOpcode::G_FPTOUI: {
3367 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3368 SrcTy = MRI.getType(I.getOperand(1).getReg());
3369 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3370 if (NewOpc == Opcode)
3371 return false;
3372
3373 I.setDesc(TII.get(NewOpc));
3374 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3375 I.setFlags(MachineInstr::NoFPExcept);
3376
3377 return true;
3378 }
3379
3380 case TargetOpcode::G_FREEZE:
3381 return selectCopy(I, TII, MRI, TRI, RBI);
3382
3383 case TargetOpcode::G_INTTOPTR:
3384 // The importer is currently unable to import pointer types since they
3385 // didn't exist in SelectionDAG.
3386 return selectCopy(I, TII, MRI, TRI, RBI);
3387
3388 case TargetOpcode::G_BITCAST:
3389 // Imported SelectionDAG rules can handle every bitcast except those that
3390 // bitcast from a type to the same type. Ideally, these shouldn't occur
3391 // but we might not run an optimizer that deletes them. The other exception
3392 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3393 // of them.
3394 return selectCopy(I, TII, MRI, TRI, RBI);
3395
3396 case TargetOpcode::G_SELECT: {
3397 auto &Sel = cast<GSelect>(I);
3398 if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
3399 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3400 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3401 return false;
3402 }
3403
3404 const Register CondReg = Sel.getCondReg();
3405 const Register TReg = Sel.getTrueReg();
3406 const Register FReg = Sel.getFalseReg();
3407
3408 if (tryOptSelect(Sel))
3409 return true;
3410
3411 // Make sure to use an unused vreg instead of wzr, so that the peephole
3412 // optimizations will be able to optimize these.
3413 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3414 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3415 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3416 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3417 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3418 return false;
3419 Sel.eraseFromParent();
3420 return true;
3421 }
3422 case TargetOpcode::G_ICMP: {
3423 if (Ty.isVector())
3424 return selectVectorICmp(I, MRI);
3425
3426 if (Ty != LLT::scalar(32)) {
3427 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3428 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3429 return false;
3430 }
3431
3432 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3433 const AArch64CC::CondCode InvCC =
3434 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3435 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3436 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3437 /*Src2=*/AArch64::WZR, InvCC, MIB);
3438 I.eraseFromParent();
3439 return true;
3440 }
3441
3442 case TargetOpcode::G_FCMP: {
3443 CmpInst::Predicate Pred =
3444 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3445 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3446 Pred) ||
3447 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3448 return false;
3449 I.eraseFromParent();
3450 return true;
3451 }
3452 case TargetOpcode::G_VASTART:
3453 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3454 : selectVaStartAAPCS(I, MF, MRI);
3455 case TargetOpcode::G_INTRINSIC:
3456 return selectIntrinsic(I, MRI);
3457 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3458 return selectIntrinsicWithSideEffects(I, MRI);
3459 case TargetOpcode::G_IMPLICIT_DEF: {
3460 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3461 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3462 const Register DstReg = I.getOperand(0).getReg();
3463 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3464 const TargetRegisterClass *DstRC =
3465 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3466 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3467 return true;
3468 }
3469 case TargetOpcode::G_BLOCK_ADDR: {
3470 if (TM.getCodeModel() == CodeModel::Large) {
3471 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3472 I.eraseFromParent();
3473 return true;
3474 } else {
3475 I.setDesc(TII.get(AArch64::MOVaddrBA));
3476 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3477 I.getOperand(0).getReg())
3478 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3479 /* Offset */ 0, AArch64II::MO_PAGE)
3480 .addBlockAddress(
3481 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3482 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3483 I.eraseFromParent();
3484 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3485 }
3486 }
3487 case AArch64::G_DUP: {
3488 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3489 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3490 // difficult because at RBS we may end up pessimizing the fpr case if we
3491 // decided to add an anyextend to fix this. Manual selection is the most
3492 // robust solution for now.
3493 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3494 AArch64::GPRRegBankID)
3495 return false; // We expect the fpr regbank case to be imported.
3496 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3497 if (VecTy == LLT::fixed_vector(8, 8))
3498 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3499 else if (VecTy == LLT::fixed_vector(16, 8))
3500 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3501 else if (VecTy == LLT::fixed_vector(4, 16))
3502 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3503 else if (VecTy == LLT::fixed_vector(8, 16))
3504 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3505 else
3506 return false;
3507 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3508 }
3509 case TargetOpcode::G_INTRINSIC_TRUNC:
3510 return selectIntrinsicTrunc(I, MRI);
3511 case TargetOpcode::G_INTRINSIC_ROUND:
3512 return selectIntrinsicRound(I, MRI);
3513 case TargetOpcode::G_BUILD_VECTOR:
3514 return selectBuildVector(I, MRI);
3515 case TargetOpcode::G_MERGE_VALUES:
3516 return selectMergeValues(I, MRI);
3517 case TargetOpcode::G_UNMERGE_VALUES:
3518 return selectUnmergeValues(I, MRI);
3519 case TargetOpcode::G_SHUFFLE_VECTOR:
3520 return selectShuffleVector(I, MRI);
3521 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3522 return selectExtractElt(I, MRI);
3523 case TargetOpcode::G_INSERT_VECTOR_ELT:
3524 return selectInsertElt(I, MRI);
3525 case TargetOpcode::G_CONCAT_VECTORS:
3526 return selectConcatVectors(I, MRI);
3527 case TargetOpcode::G_JUMP_TABLE:
3528 return selectJumpTable(I, MRI);
3529 case TargetOpcode::G_VECREDUCE_FADD:
3530 case TargetOpcode::G_VECREDUCE_ADD:
3531 return selectReduction(I, MRI);
3532 case TargetOpcode::G_MEMCPY:
3533 case TargetOpcode::G_MEMCPY_INLINE:
3534 case TargetOpcode::G_MEMMOVE:
3535 case TargetOpcode::G_MEMSET:
3536 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3536, __extension__ __PRETTY_FUNCTION__))
;
3537 return selectMOPS(I, MRI);
3538 }
3539
3540 return false;
3541}
3542
3543bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3544 MachineRegisterInfo &MRI) {
3545 Register VecReg = I.getOperand(1).getReg();
3546 LLT VecTy = MRI.getType(VecReg);
3547 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3548 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3549 // a subregister copy afterwards.
3550 if (VecTy == LLT::fixed_vector(2, 32)) {
3551 Register DstReg = I.getOperand(0).getReg();
3552 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3553 {VecReg, VecReg});
3554 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3555 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3556 .getReg(0);
3557 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3558 I.eraseFromParent();
3559 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3560 }
3561
3562 unsigned Opc = 0;
3563 if (VecTy == LLT::fixed_vector(16, 8))
3564 Opc = AArch64::ADDVv16i8v;
3565 else if (VecTy == LLT::fixed_vector(8, 16))
3566 Opc = AArch64::ADDVv8i16v;
3567 else if (VecTy == LLT::fixed_vector(4, 32))
3568 Opc = AArch64::ADDVv4i32v;
3569 else if (VecTy == LLT::fixed_vector(2, 64))
3570 Opc = AArch64::ADDPv2i64p;
3571 else {
3572 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3573 return false;
3574 }
3575 I.setDesc(TII.get(Opc));
3576 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3577 }
3578
3579 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3580 unsigned Opc = 0;
3581 if (VecTy == LLT::fixed_vector(2, 32))
3582 Opc = AArch64::FADDPv2i32p;
3583 else if (VecTy == LLT::fixed_vector(2, 64))
3584 Opc = AArch64::FADDPv2i64p;
3585 else {
3586 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3587 return false;
3588 }
3589 I.setDesc(TII.get(Opc));
3590 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3591 }
3592 return false;
3593}
3594
3595bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3596 MachineRegisterInfo &MRI) {
3597 unsigned Mopcode;
3598 switch (GI.getOpcode()) {
3599 case TargetOpcode::G_MEMCPY:
3600 case TargetOpcode::G_MEMCPY_INLINE:
3601 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3602 break;
3603 case TargetOpcode::G_MEMMOVE:
3604 Mopcode = AArch64::MOPSMemoryMovePseudo;
3605 break;
3606 case TargetOpcode::G_MEMSET:
3607 // For tagged memset see llvm.aarch64.mops.memset.tag
3608 Mopcode = AArch64::MOPSMemorySetPseudo;
3609 break;
3610 }
3611
3612 auto &DstPtr = GI.getOperand(0);
3613 auto &SrcOrVal = GI.getOperand(1);
3614 auto &Size = GI.getOperand(2);
3615
3616 // Create copies of the registers that can be clobbered.
3617 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3618 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3619 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3620
3621 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3622 const auto &SrcValRegClass =
3623 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3624
3625 // Constrain to specific registers
3626 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3627 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3628 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3629
3630 MIB.buildCopy(DstPtrCopy, DstPtr);
3631 MIB.buildCopy(SrcValCopy, SrcOrVal);
3632 MIB.buildCopy(SizeCopy, Size);
3633
3634 // New instruction uses the copied registers because it must update them.
3635 // The defs are not used since they don't exist in G_MEM*. They are still
3636 // tied.
3637 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3638 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3639 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3640 if (IsSet) {
3641 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3642 {DstPtrCopy, SizeCopy, SrcValCopy});
3643 } else {
3644 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3645 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3646 {DstPtrCopy, SrcValCopy, SizeCopy});
3647 }
3648
3649 GI.eraseFromParent();
3650 return true;
3651}
3652
3653bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3654 MachineRegisterInfo &MRI) {
3655 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3655, __extension__ __PRETTY_FUNCTION__))
;
3656 Register JTAddr = I.getOperand(0).getReg();
3657 unsigned JTI = I.getOperand(1).getIndex();
3658 Register Index = I.getOperand(2).getReg();
3659
3660 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3661 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3662
3663 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3664 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3665 {TargetReg, ScratchReg}, {JTAddr, Index})
3666 .addJumpTableIndex(JTI);
3667 // Build the indirect branch.
3668 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3669 I.eraseFromParent();
3670 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3671}
3672
3673bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3674 MachineRegisterInfo &MRI) {
3675 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3675, __extension__ __PRETTY_FUNCTION__))
;
3676 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
;
3677
3678 Register DstReg = I.getOperand(0).getReg();
3679 unsigned JTI = I.getOperand(1).getIndex();
3680 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3681 auto MovMI =
3682 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3683 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3684 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3685 I.eraseFromParent();
3686 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3687}
3688
3689bool AArch64InstructionSelector::selectTLSGlobalValue(
3690 MachineInstr &I, MachineRegisterInfo &MRI) {
3691 if (!STI.isTargetMachO())
3692 return false;
3693 MachineFunction &MF = *I.getParent()->getParent();
3694 MF.getFrameInfo().setAdjustsStack(true);
3695
3696 const auto &GlobalOp = I.getOperand(1);
3697 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3698, __extension__ __PRETTY_FUNCTION__))
3698 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3698, __extension__ __PRETTY_FUNCTION__))
;
3699 const GlobalValue &GV = *GlobalOp.getGlobal();
3700
3701 auto LoadGOT =
3702 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3703 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3704
3705 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3706 {LoadGOT.getReg(0)})
3707 .addImm(0);
3708
3709 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3710 // TLS calls preserve all registers except those that absolutely must be
3711 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3712 // silly).
3713 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3714 .addUse(AArch64::X0, RegState::Implicit)
3715 .addDef(AArch64::X0, RegState::Implicit)
3716 .addRegMask(TRI.getTLSCallPreservedMask());
3717
3718 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3719 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3720 MRI);
3721 I.eraseFromParent();
3722 return true;
3723}
3724
3725bool AArch64InstructionSelector::selectIntrinsicTrunc(
3726 MachineInstr &I, MachineRegisterInfo &MRI) const {
3727 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3728
3729 // Select the correct opcode.
3730 unsigned Opc = 0;
3731 if (!SrcTy.isVector()) {
3732 switch (SrcTy.getSizeInBits()) {
3733 default:
3734 case 16:
3735 Opc = AArch64::FRINTZHr;
3736 break;
3737 case 32:
3738 Opc = AArch64::FRINTZSr;
3739 break;
3740 case 64:
3741 Opc = AArch64::FRINTZDr;
3742 break;
3743 }
3744 } else {
3745 unsigned NumElts = SrcTy.getNumElements();
3746 switch (SrcTy.getElementType().getSizeInBits()) {
3747 default:
3748 break;
3749 case 16:
3750 if (NumElts == 4)
3751 Opc = AArch64::FRINTZv4f16;
3752 else if (NumElts == 8)
3753 Opc = AArch64::FRINTZv8f16;
3754 break;
3755 case 32:
3756 if (NumElts == 2)
3757 Opc = AArch64::FRINTZv2f32;
3758 else if (NumElts == 4)
3759 Opc = AArch64::FRINTZv4f32;
3760 break;
3761 case 64:
3762 if (NumElts == 2)
3763 Opc = AArch64::FRINTZv2f64;
3764 break;
3765 }
3766 }
3767
3768 if (!Opc) {
3769 // Didn't get an opcode above, bail.
3770 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3771 return false;
3772 }
3773
3774 // Legalization would have set us up perfectly for this; we just need to
3775 // set the opcode and move on.
3776 I.setDesc(TII.get(Opc));
3777 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3778}
3779
3780bool AArch64InstructionSelector::selectIntrinsicRound(
3781 MachineInstr &I, MachineRegisterInfo &MRI) const {
3782 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3783
3784 // Select the correct opcode.
3785 unsigned Opc = 0;
3786 if (!SrcTy.isVector()) {
3787 switch (SrcTy.getSizeInBits()) {
3788 default:
3789 case 16:
3790 Opc = AArch64::FRINTAHr;
3791 break;
3792 case 32:
3793 Opc = AArch64::FRINTASr;
3794 break;
3795 case 64:
3796 Opc = AArch64::FRINTADr;
3797 break;
3798 }
3799 } else {
3800 unsigned NumElts = SrcTy.getNumElements();
3801 switch (SrcTy.getElementType().getSizeInBits()) {
3802 default:
3803 break;
3804 case 16:
3805 if (NumElts == 4)
3806 Opc = AArch64::FRINTAv4f16;
3807 else if (NumElts == 8)
3808 Opc = AArch64::FRINTAv8f16;
3809 break;
3810 case 32:
3811 if (NumElts == 2)
3812 Opc = AArch64::FRINTAv2f32;
3813 else if (NumElts == 4)
3814 Opc = AArch64::FRINTAv4f32;
3815 break;
3816 case 64:
3817 if (NumElts == 2)
3818 Opc = AArch64::FRINTAv2f64;
3819 break;
3820 }
3821 }
3822
3823 if (!Opc) {
3824 // Didn't get an opcode above, bail.
3825 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3826 return false;
3827 }
3828
3829 // Legalization would have set us up perfectly for this; we just need to
3830 // set the opcode and move on.
3831 I.setDesc(TII.get(Opc));
3832 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3833}
3834
3835bool AArch64InstructionSelector::selectVectorICmp(
3836 MachineInstr &I, MachineRegisterInfo &MRI) {
3837 Register DstReg = I.getOperand(0).getReg();
3838 LLT DstTy = MRI.getType(DstReg);
3839 Register SrcReg = I.getOperand(2).getReg();
3840 Register Src2Reg = I.getOperand(3).getReg();
3841 LLT SrcTy = MRI.getType(SrcReg);
3842
3843 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3844 unsigned NumElts = DstTy.getNumElements();
3845
3846 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3847 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3848 // Third index is cc opcode:
3849 // 0 == eq
3850 // 1 == ugt
3851 // 2 == uge
3852 // 3 == ult
3853 // 4 == ule
3854 // 5 == sgt
3855 // 6 == sge
3856 // 7 == slt
3857 // 8 == sle
3858 // ne is done by negating 'eq' result.
3859
3860 // This table below assumes that for some comparisons the operands will be
3861 // commuted.
3862 // ult op == commute + ugt op
3863 // ule op == commute + uge op
3864 // slt op == commute + sgt op
3865 // sle op == commute + sge op
3866 unsigned PredIdx = 0;
3867 bool SwapOperands = false;
3868 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3869 switch (Pred) {
3870 case CmpInst::ICMP_NE:
3871 case CmpInst::ICMP_EQ:
3872 PredIdx = 0;
3873 break;
3874 case CmpInst::ICMP_UGT:
3875 PredIdx = 1;
3876 break;
3877 case CmpInst::ICMP_UGE:
3878 PredIdx = 2;
3879 break;
3880 case CmpInst::ICMP_ULT:
3881 PredIdx = 3;
3882 SwapOperands = true;
3883 break;
3884 case CmpInst::ICMP_ULE:
3885 PredIdx = 4;
3886 SwapOperands = true;
3887 break;
3888 case CmpInst::ICMP_SGT:
3889 PredIdx = 5;
3890 break;
3891 case CmpInst::ICMP_SGE:
3892 PredIdx = 6;
3893 break;
3894 case CmpInst::ICMP_SLT:
3895 PredIdx = 7;
3896 SwapOperands = true;
3897 break;
3898 case CmpInst::ICMP_SLE:
3899 PredIdx = 8;
3900 SwapOperands = true;
3901 break;
3902 default:
3903 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3903)
;
3904 return false;
3905 }
3906
3907 // This table obviously should be tablegen'd when we have our GISel native
3908 // tablegen selector.
3909
3910 static const unsigned OpcTable[4][4][9] = {
3911 {
3912 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3913 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3914 0 /* invalid */},
3915 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3916 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3917 0 /* invalid */},
3918 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3919 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3920 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3921 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3922 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3923 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3924 },
3925 {
3926 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3927 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3928 0 /* invalid */},
3929 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3930 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3931 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3932 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3933 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3934 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3935 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3936 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3937 0 /* invalid */}
3938 },
3939 {
3940 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3941 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3942 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3943 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3944 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3945 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3946 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3947 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3948 0 /* invalid */},
3949 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3950 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3951 0 /* invalid */}
3952 },
3953 {
3954 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3955 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3956 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3957 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3958 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3959 0 /* invalid */},
3960 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3961 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3962 0 /* invalid */},
3963 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3964 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3965 0 /* invalid */}
3966 },
3967 };
3968 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3969 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3970 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3971 if (!Opc) {
3972 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3973 return false;
3974 }
3975
3976 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3977 const TargetRegisterClass *SrcRC =
3978 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3979 if (!SrcRC) {
3980 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3981 return false;
3982 }
3983
3984 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3985 if (SrcTy.getSizeInBits() == 128)
3986 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3987
3988 if (SwapOperands)
3989 std::swap(SrcReg, Src2Reg);
3990
3991 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3992 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3993
3994 // Invert if we had a 'ne' cc.
3995 if (NotOpc) {
3996 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3997 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3998 } else {
3999 MIB.buildCopy(DstReg, Cmp.getReg(0));
4000 }
4001 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
4002 I.eraseFromParent();
4003 return true;
4004}
4005
4006MachineInstr *AArch64InstructionSelector::emitScalarToVector(
4007 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
4008 MachineIRBuilder &MIRBuilder) const {
4009 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
4010
4011 auto BuildFn = [&](unsigned SubregIndex) {
4012 auto Ins =
4013 MIRBuilder
4014 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
4015 .addImm(SubregIndex);
4016 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
4017 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
4018 return &*Ins;
4019 };
4020
4021 switch (EltSize) {
4022 case 16:
4023 return BuildFn(AArch64::hsub);
4024 case 32:
4025 return BuildFn(AArch64::ssub);
4026 case 64:
4027 return BuildFn(AArch64::dsub);
4028 default:
4029 return nullptr;
4030 }
4031}
4032
4033bool AArch64InstructionSelector::selectMergeValues(
4034 MachineInstr &I, MachineRegisterInfo &MRI) {
4035 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4035, __extension__ __PRETTY_FUNCTION__))
;
4036 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4037 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4038 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4038, __extension__ __PRETTY_FUNCTION__))
;
4039 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4040
4041 if (I.getNumOperands() != 3)
4042 return false;
4043
4044 // Merging 2 s64s into an s128.
4045 if (DstTy == LLT::scalar(128)) {
4046 if (SrcTy.getSizeInBits() != 64)
4047 return false;
4048 Register DstReg = I.getOperand(0).getReg();
4049 Register Src1Reg = I.getOperand(1).getReg();
4050 Register Src2Reg = I.getOperand(2).getReg();
4051 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4052 MachineInstr *InsMI =
4053 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
4054 if (!InsMI)
4055 return false;
4056 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4057 Src2Reg, /* LaneIdx */ 1, RB, MIB);
4058 if (!Ins2MI)
4059 return false;
4060 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4061 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4062 I.eraseFromParent();
4063 return true;
4064 }
4065
4066 if (RB.getID() != AArch64::GPRRegBankID)
4067 return false;
4068
4069 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4070 return false;
4071
4072 auto *DstRC = &AArch64::GPR64RegClass;
4073 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4074 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4075 TII.get(TargetOpcode::SUBREG_TO_REG))
4076 .addDef(SubToRegDef)
4077 .addImm(0)
4078 .addUse(I.getOperand(1).getReg())
4079 .addImm(AArch64::sub_32);
4080 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4081 // Need to anyext the second scalar before we can use bfm
4082 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4083 TII.get(TargetOpcode::SUBREG_TO_REG))
4084 .addDef(SubToRegDef2)
4085 .addImm(0)
4086 .addUse(I.getOperand(2).getReg())
4087 .addImm(AArch64::sub_32);
4088 MachineInstr &BFM =
4089 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4090 .addDef(I.getOperand(0).getReg())
4091 .addUse(SubToRegDef)
4092 .addUse(SubToRegDef2)
4093 .addImm(32)
4094 .addImm(31);
4095 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4096 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4097 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4098 I.eraseFromParent();
4099 return true;
4100}
4101
4102static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4103 const unsigned EltSize) {
4104 // Choose a lane copy opcode and subregister based off of the size of the
4105 // vector's elements.
4106 switch (EltSize) {
4107 case 8:
4108 CopyOpc = AArch64::DUPi8;
4109 ExtractSubReg = AArch64::bsub;
4110 break;
4111 case 16:
4112 CopyOpc = AArch64::DUPi16;
4113 ExtractSubReg = AArch64::hsub;
4114 break;
4115 case 32:
4116 CopyOpc = AArch64::DUPi32;
4117 ExtractSubReg = AArch64::ssub;
4118 break;
4119 case 64:
4120 CopyOpc = AArch64::DUPi64;
4121 ExtractSubReg = AArch64::dsub;
4122 break;
4123 default:
4124 // Unknown size, bail out.
4125 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4126 return false;
4127 }
4128 return true;
4129}
4130
4131MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4132 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4133 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4134 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4135 unsigned CopyOpc = 0;
4136 unsigned ExtractSubReg = 0;
4137 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4138 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4139 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4140 return nullptr;
4141 }
4142
4143 const TargetRegisterClass *DstRC =
4144 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
4145 if (!DstRC) {
4146 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4147 return nullptr;
4148 }
4149
4150 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4151 const LLT &VecTy = MRI.getType(VecReg);
4152 const TargetRegisterClass *VecRC =
4153 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
4154 if (!VecRC) {
4155 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4156 return nullptr;
4157 }
4158
4159 // The register that we're going to copy into.
4160 Register InsertReg = VecReg;
4161 if (!DstReg)
4162 DstReg = MRI.createVirtualRegister(DstRC);
4163 // If the lane index is 0, we just use a subregister COPY.
4164 if (LaneIdx == 0) {
4165 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4166 .addReg(VecReg, 0, ExtractSubReg);
4167 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4168 return &*Copy;
4169 }
4170
4171 // Lane copies require 128-bit wide registers. If we're dealing with an
4172 // unpacked vector, then we need to move up to that width. Insert an implicit
4173 // def and a subregister insert to get us there.
4174 if (VecTy.getSizeInBits() != 128) {
4175 MachineInstr *ScalarToVector = emitScalarToVector(
4176 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4177 if (!ScalarToVector)
4178 return nullptr;
4179 InsertReg = ScalarToVector->getOperand(0).getReg();
4180 }
4181
4182 MachineInstr *LaneCopyMI =
4183 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4184 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4185
4186 // Make sure that we actually constrain the initial copy.
4187 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4188 return LaneCopyMI;
4189}
4190
4191bool AArch64InstructionSelector::selectExtractElt(
4192 MachineInstr &I, MachineRegisterInfo &MRI) {
4193 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4194, __extension__ __PRETTY_FUNCTION__))
4194 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4194, __extension__ __PRETTY_FUNCTION__))
;
4195 Register DstReg = I.getOperand(0).getReg();
4196 const LLT NarrowTy = MRI.getType(DstReg);
4197 const Register SrcReg = I.getOperand(1).getReg();
4198 const LLT WideTy = MRI.getType(SrcReg);
4199 (void)WideTy;
4200 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4201, __extension__ __PRETTY_FUNCTION__))
4201 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4201, __extension__ __PRETTY_FUNCTION__))
;
4202 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4202, __extension__ __PRETTY_FUNCTION__))
;
4203
4204 // Need the lane index to determine the correct copy opcode.
4205 MachineOperand &LaneIdxOp = I.getOperand(2);
4206 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4206, __extension__ __PRETTY_FUNCTION__))
;
4207
4208 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4209 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4210 return false;
4211 }
4212
4213 // Find the index to extract from.
4214 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4215 if (!VRegAndVal)
4216 return false;
4217 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4218
4219
4220 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4221 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4222 LaneIdx, MIB);
4223 if (!Extract)
4224 return false;
4225
4226 I.eraseFromParent();
4227 return true;
4228}
4229
4230bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4231 MachineInstr &I, MachineRegisterInfo &MRI) {
4232 unsigned NumElts = I.getNumOperands() - 1;
4233 Register SrcReg = I.getOperand(NumElts).getReg();
4234 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4235 const LLT SrcTy = MRI.getType(SrcReg);
4236
4237 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4237, __extension__ __PRETTY_FUNCTION__))
;
4238 if (SrcTy.getSizeInBits() > 128) {
4239 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4240 return false;
4241 }
4242
4243 // We implement a split vector operation by treating the sub-vectors as
4244 // scalars and extracting them.
4245 const RegisterBank &DstRB =
4246 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4247 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4248 Register Dst = I.getOperand(OpIdx).getReg();
4249 MachineInstr *Extract =
4250 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4251 if (!Extract)
4252 return false;
4253 }
4254 I.eraseFromParent();
4255 return true;
4256}
4257
4258bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4259 MachineRegisterInfo &MRI) {
4260 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
4261 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4261, __extension__ __PRETTY_FUNCTION__))
;
4262
4263 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4264 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4265 AArch64::FPRRegBankID ||
4266 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4267 AArch64::FPRRegBankID) {
4268 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4269 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4270 return false;
4271 }
4272
4273 // The last operand is the vector source register, and every other operand is
4274 // a register to unpack into.
4275 unsigned NumElts = I.getNumOperands() - 1;
4276 Register SrcReg = I.getOperand(NumElts).getReg();
4277 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4278 const LLT WideTy = MRI.getType(SrcReg);
4279 (void)WideTy;
4280 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4281, __extension__ __PRETTY_FUNCTION__))
4281 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4281, __extension__ __PRETTY_FUNCTION__))
;
4282 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4283, __extension__ __PRETTY_FUNCTION__))
4283 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4283, __extension__ __PRETTY_FUNCTION__))
;
4284
4285 if (!NarrowTy.isScalar())
4286 return selectSplitVectorUnmerge(I, MRI);
4287
4288 // Choose a lane copy opcode and subregister based off of the size of the
4289 // vector's elements.
4290 unsigned CopyOpc = 0;
4291 unsigned ExtractSubReg = 0;
4292 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4293 return false;
4294
4295 // Set up for the lane copies.
4296 MachineBasicBlock &MBB = *I.getParent();
4297
4298 // Stores the registers we'll be copying from.
4299 SmallVector<Register, 4> InsertRegs;
4300
4301 // We'll use the first register twice, so we only need NumElts-1 registers.
4302 unsigned NumInsertRegs = NumElts - 1;
4303
4304 // If our elements fit into exactly 128 bits, then we can copy from the source
4305 // directly. Otherwise, we need to do a bit of setup with some subregister
4306 // inserts.
4307 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4308 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4309 } else {
4310 // No. We have to perform subregister inserts. For each insert, create an
4311 // implicit def and a subregister insert, and save the register we create.
4312 const TargetRegisterClass *RC =
4313 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4314 WideTy.getScalarSizeInBits() * NumElts);
4315 unsigned SubReg = 0;
4316 bool Found = getSubRegForClass(RC, TRI, SubReg);
4317 (void)Found;
4318 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4318, __extension__ __PRETTY_FUNCTION__))
;
4319 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4320 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4321 MachineInstr &ImpDefMI =
4322 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4323 ImpDefReg);
4324
4325 // Now, create the subregister insert from SrcReg.
4326 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4327 MachineInstr &InsMI =
4328 *BuildMI(MBB, I, I.getDebugLoc(),
4329 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4330 .addUse(ImpDefReg)
4331 .addUse(SrcReg)
4332 .addImm(SubReg);
4333
4334 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4335 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4336
4337 // Save the register so that we can copy from it after.
4338 InsertRegs.push_back(InsertReg);
4339 }
4340 }
4341
4342 // Now that we've created any necessary subregister inserts, we can
4343 // create the copies.
4344 //
4345 // Perform the first copy separately as a subregister copy.
4346 Register CopyTo = I.getOperand(0).getReg();
4347 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4348 .addReg(InsertRegs[0], 0, ExtractSubReg);
4349 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4350
4351 // Now, perform the remaining copies as vector lane copies.
4352 unsigned LaneIdx = 1;
4353 for (Register InsReg : InsertRegs) {
4354 Register CopyTo = I.getOperand(LaneIdx).getReg();
4355 MachineInstr &CopyInst =
4356 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4357 .addUse(InsReg)
4358 .addImm(LaneIdx);
4359 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4360 ++LaneIdx;
4361 }
4362
4363 // Separately constrain the first copy's destination. Because of the
4364 // limitation in constrainOperandRegClass, we can't guarantee that this will
4365 // actually be constrained. So, do it ourselves using the second operand.
4366 const TargetRegisterClass *RC =
4367 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4368 if (!RC) {
4369 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4370 return false;
4371 }
4372
4373 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4374 I.eraseFromParent();
4375 return true;
4376}
4377
4378bool AArch64InstructionSelector::selectConcatVectors(
4379 MachineInstr &I, MachineRegisterInfo &MRI) {
4380 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4381, __extension__ __PRETTY_FUNCTION__))
4381 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4381, __extension__ __PRETTY_FUNCTION__))
;
4382 Register Dst = I.getOperand(0).getReg();
4383 Register Op1 = I.getOperand(1).getReg();
4384 Register Op2 = I.getOperand(2).getReg();
4385 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4386 if (!ConcatMI)
4387 return false;
4388 I.eraseFromParent();
4389 return true;
4390}
4391
4392unsigned
4393AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4394 MachineFunction &MF) const {
4395 Type *CPTy = CPVal->getType();
4396 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4397
4398 MachineConstantPool *MCP = MF.getConstantPool();
4399 return MCP->getConstantPoolIndex(CPVal, Alignment);
4400}
4401
4402MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4403 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4404 auto &MF = MIRBuilder.getMF();
4405 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4406
4407 auto Adrp =
4408 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4409 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4410
4411 MachineInstr *LoadMI = nullptr;
4412 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4413 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4414 switch (Size) {
4415 case 16:
4416 LoadMI =
4417 &*MIRBuilder
4418 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4419 .addConstantPoolIndex(CPIdx, 0,
4420 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4421 break;
4422 case 8:
4423 LoadMI =
4424 &*MIRBuilder
4425 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4426 .addConstantPoolIndex(CPIdx, 0,
4427 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4428 break;
4429 case 4:
4430 LoadMI =
4431 &*MIRBuilder
4432 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4433 .addConstantPoolIndex(CPIdx, 0,
4434 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4435 break;
4436 case 2:
4437 LoadMI =
4438 &*MIRBuilder
4439 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4440 .addConstantPoolIndex(CPIdx, 0,
4441 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4442 break;
4443 default:
4444 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4445 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4446 return nullptr;
4447 }
4448 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4449 MachineMemOperand::MOLoad,
4450 Size, Align(Size)));
4451 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4452 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4453 return LoadMI;
4454}
4455
4456/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4457/// size and RB.
4458static std::pair<unsigned, unsigned>
4459getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4460 unsigned Opc, SubregIdx;
4461 if (RB.getID() == AArch64::GPRRegBankID) {
4462 if (EltSize == 16) {
4463 Opc = AArch64::INSvi16gpr;
4464 SubregIdx = AArch64::ssub;
4465 } else if (EltSize == 32) {
4466 Opc = AArch64::INSvi32gpr;
4467 SubregIdx = AArch64::ssub;
4468 } else if (EltSize == 64) {
4469 Opc = AArch64::INSvi64gpr;
4470 SubregIdx = AArch64::dsub;
4471 } else {
4472 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4472)
;
4473 }
4474 } else {
4475 if (EltSize == 8) {
4476 Opc = AArch64::INSvi8lane;
4477 SubregIdx = AArch64::bsub;
4478 } else if (EltSize == 16) {
4479 Opc = AArch64::INSvi16lane;
4480 SubregIdx = AArch64::hsub;
4481 } else if (EltSize == 32) {
4482 Opc = AArch64::INSvi32lane;
4483 SubregIdx = AArch64::ssub;
4484 } else if (EltSize == 64) {
4485 Opc = AArch64::INSvi64lane;
4486 SubregIdx = AArch64::dsub;
4487 } else {
4488 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4488)
;
4489 }
4490 }
4491 return std::make_pair(Opc, SubregIdx);
4492}
4493
4494MachineInstr *AArch64InstructionSelector::emitInstr(
4495 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4496 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4497 const ComplexRendererFns &RenderFns) const {
4498 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4498, __extension__ __PRETTY_FUNCTION__))
;
4499 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4500, __extension__ __PRETTY_FUNCTION__))
4500 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4500, __extension__ __PRETTY_FUNCTION__))
;
4501 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4502 if (RenderFns)
4503 for (auto &Fn : *RenderFns)
4504 Fn(MI);
4505 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4506 return &*MI;
4507}
4508
4509MachineInstr *AArch64InstructionSelector::emitAddSub(
4510 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4511 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4512 MachineIRBuilder &MIRBuilder) const {
4513 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4514 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4514, __extension__ __PRETTY_FUNCTION__))
;
4515 auto Ty = MRI.getType(LHS.getReg());
4516 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4516, __extension__ __PRETTY_FUNCTION__))
;
4517 unsigned Size = Ty.getSizeInBits();
4518 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4518, __extension__ __PRETTY_FUNCTION__))
;
4519 bool Is32Bit = Size == 32;
4520
4521 // INSTRri form with positive arithmetic immediate.
4522 if (auto Fns = selectArithImmed(RHS))
4523 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4524 MIRBuilder, Fns);
4525
4526 // INSTRri form with negative arithmetic immediate.
4527 if (auto Fns = selectNegArithImmed(RHS))
4528 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4529 MIRBuilder, Fns);
4530
4531 // INSTRrx form.
4532 if (auto Fns = selectArithExtendedRegister(RHS))
4533 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4534 MIRBuilder, Fns);
4535
4536 // INSTRrs form.
4537 if (auto Fns = selectShiftedRegister(RHS))
4538 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4539 MIRBuilder, Fns);
4540 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4541 MIRBuilder);
4542}
4543
4544MachineInstr *
4545AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4546 MachineOperand &RHS,
4547 MachineIRBuilder &MIRBuilder) const {
4548 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4549 {{AArch64::ADDXri, AArch64::ADDWri},
4550 {AArch64::ADDXrs, AArch64::ADDWrs},
4551 {AArch64::ADDXrr, AArch64::ADDWrr},
4552 {AArch64::SUBXri, AArch64::SUBWri},
4553 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4554 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4555}
4556
4557MachineInstr *
4558AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4559 MachineOperand &RHS,
4560 MachineIRBuilder &MIRBuilder) const {
4561 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4562 {{AArch64::ADDSXri, AArch64::ADDSWri},
4563 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4564 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4565 {AArch64::SUBSXri, AArch64::SUBSWri},
4566 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4567 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4568}
4569
4570MachineInstr *
4571AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4572 MachineOperand &RHS,
4573 MachineIRBuilder &MIRBuilder) const {
4574 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4575 {{AArch64::SUBSXri, AArch64::SUBSWri},
4576 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4577 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4578 {AArch64::ADDSXri, AArch64::ADDSWri},
4579 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4580 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4581}
4582
4583MachineInstr *
4584AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4585 MachineIRBuilder &MIRBuilder) const {
4586 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4587 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4588 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4589 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4590}
4591
4592MachineInstr *
4593AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4594 MachineIRBuilder &MIRBuilder) const {
4595 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4595, __extension__ __PRETTY_FUNCTION__))
;
4596 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4597 LLT Ty = MRI.getType(LHS.getReg());
4598 unsigned RegSize = Ty.getSizeInBits();
4599 bool Is32Bit = (RegSize == 32);
4600 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4601 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4602 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4603 // ANDS needs a logical immediate for its immediate form. Check if we can
4604 // fold one in.
4605 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4606 int64_t Imm = ValAndVReg->Value.getSExtValue();
4607
4608 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4609 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4610 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4611 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4612 return &*TstMI;
4613 }
4614 }
4615
4616 if (auto Fns = selectLogicalShiftedRegister(RHS))
4617 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4618 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4619}
4620
4621MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4622 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4623 MachineIRBuilder &MIRBuilder) const {
4624 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4624, __extension__ __PRETTY_FUNCTION__))
;
4625 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4625, __extension__ __PRETTY_FUNCTION__))
;
4626 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4627 LLT CmpTy = MRI.getType(LHS.getReg());
4628 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4628, __extension__ __PRETTY_FUNCTION__))
;
4629 unsigned Size = CmpTy.getSizeInBits();
4630 (void)Size;
4631 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4631, __extension__ __PRETTY_FUNCTION__))
;
4632 // Fold the compare into a cmn or tst if possible.
4633 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4634 return FoldCmp;
4635 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4636 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4637}
4638
4639MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4640 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4641 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4642#ifndef NDEBUG
4643 LLT Ty = MRI.getType(Dst);
4644 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4645, __extension__ __PRETTY_FUNCTION__))
4645 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4645, __extension__ __PRETTY_FUNCTION__))
;
4646#endif
4647 const Register ZReg = AArch64::WZR;
4648 AArch64CC::CondCode CC1, CC2;
4649 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4650 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4651 if (CC2 == AArch64CC::AL)
4652 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4653 MIRBuilder);
4654 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4655 Register Def1Reg = MRI.createVirtualRegister(RC);
4656 Register Def2Reg = MRI.createVirtualRegister(RC);
4657 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4658 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4659 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4660 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4661 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4662 return &*OrMI;
4663}
4664
4665MachineInstr *
4666AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4667 MachineIRBuilder &MIRBuilder,
4668 Optional<CmpInst::Predicate> Pred) const {
4669 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4670 LLT Ty = MRI.getType(LHS);
4671 if (Ty.isVector())
4672 return nullptr;
4673 unsigned OpSize = Ty.getSizeInBits();
4674 if (OpSize != 32 && OpSize != 64)
4675 return nullptr;
4676
4677 // If this is a compare against +0.0, then we don't have
4678 // to explicitly materialize a constant.
4679 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4680 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4681
4682 auto IsEqualityPred = [](CmpInst::Predicate P) {
4683 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4684 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4685 };
4686 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4687 // Try commutating the operands.
4688 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4689 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4690 ShouldUseImm = true;
4691 std::swap(LHS, RHS);
4692 }
4693 }
4694 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4695 {AArch64::FCMPSri, AArch64::FCMPDri}};
4696 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4697
4698 // Partially build the compare. Decide if we need to add a use for the
4699 // third operand based off whether or not we're comparing against 0.0.
4700 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4701 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4702 if (!ShouldUseImm)
4703 CmpMI.addUse(RHS);
4704 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4705 return &*CmpMI;
4706}
4707
4708MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4709 Optional<Register> Dst, Register Op1, Register Op2,
4710 MachineIRBuilder &MIRBuilder) const {
4711 // We implement a vector concat by:
4712 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4713 // 2. Insert the upper vector into the destination's upper element
4714 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4715 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4716
4717 const LLT Op1Ty = MRI.getType(Op1);
4718 const LLT Op2Ty = MRI.getType(Op2);
4719
4720 if (Op1Ty != Op2Ty) {
4721 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4722 return nullptr;
4723 }
4724 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4724, __extension__ __PRETTY_FUNCTION__))
;
4725
4726 if (Op1Ty.getSizeInBits() >= 128) {
4727 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4728 return nullptr;
4729 }
4730
4731 // At the moment we just support 64 bit vector concats.
4732 if (Op1Ty.getSizeInBits() != 64) {
4733 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4734 return nullptr;
4735 }
4736
4737 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4738 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4739 const TargetRegisterClass *DstRC =
4740 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4741
4742 MachineInstr *WidenedOp1 =
4743 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4744 MachineInstr *WidenedOp2 =
4745 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4746 if (!WidenedOp1 || !WidenedOp2) {
4747 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4748 return nullptr;
4749 }
4750
4751 // Now do the insert of the upper element.
4752 unsigned InsertOpc, InsSubRegIdx;
4753 std::tie(InsertOpc, InsSubRegIdx) =
4754 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4755
4756 if (!Dst)
4757 Dst = MRI.createVirtualRegister(DstRC);
4758 auto InsElt =
4759 MIRBuilder
4760 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4761 .addImm(1) /* Lane index */
4762 .addUse(WidenedOp2->getOperand(0).getReg())
4763 .addImm(0);
4764 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4765 return &*InsElt;
4766}
4767
4768MachineInstr *
4769AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4770 Register Src2, AArch64CC::CondCode Pred,
4771 MachineIRBuilder &MIRBuilder) const {
4772 auto &MRI = *MIRBuilder.getMRI();
4773 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4774 // If we used a register class, then this won't necessarily have an LLT.
4775 // Compute the size based off whether or not we have a class or bank.
4776 unsigned Size;
4777 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4778 Size = TRI.getRegSizeInBits(*RC);
4779 else
4780 Size = MRI.getType(Dst).getSizeInBits();
4781 // Some opcodes use s1.
4782 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4782, __extension__ __PRETTY_FUNCTION__))
;
4783 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4784 unsigned Opc = OpcTable[Size == 64];
4785 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4786 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4787 return &*CSINC;
4788}
4789
4790std::pair<MachineInstr *, AArch64CC::CondCode>
4791AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4792 MachineOperand &LHS,
4793 MachineOperand &RHS,
4794 MachineIRBuilder &MIRBuilder) const {
4795 switch (Opcode) {
4796 default:
4797 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4797)
;
4798 case TargetOpcode::G_SADDO:
4799 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4800 case TargetOpcode::G_UADDO:
4801 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4802 case TargetOpcode::G_SSUBO:
4803 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4804 case TargetOpcode::G_USUBO:
4805 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4806 }
4807}
4808
4809/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4810/// expressed as a conjunction.
4811/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4812/// changing the conditions on the CMP tests.
4813/// (this means we can call emitConjunctionRec() with
4814/// Negate==true on this sub-tree)
4815/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4816/// cannot do the negation naturally. We are required to
4817/// emit the subtree first in this case.
4818/// \param WillNegate Is true if are called when the result of this
4819/// subexpression must be negated. This happens when the
4820/// outer expression is an OR. We can use this fact to know
4821/// that we have a double negation (or (or ...) ...) that
4822/// can be implemented for free.
4823static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4824 bool WillNegate, MachineRegisterInfo &MRI,
4825 unsigned Depth = 0) {
4826 if (!MRI.hasOneNonDBGUse(Val))
4827 return false;
4828 MachineInstr *ValDef = MRI.getVRegDef(Val);
4829 unsigned Opcode = ValDef->getOpcode();
4830 if (Opcode == TargetOpcode::G_TRUNC) {
4831 // Look through a trunc.
4832 Val = ValDef->getOperand(1).getReg();
4833 ValDef = MRI.getVRegDef(Val);
4834 Opcode = ValDef->getOpcode();
4835 }
4836 if (isa<GAnyCmp>(ValDef)) {
4837 CanNegate = true;
4838 MustBeFirst = false;
4839 return true;
4840 }
4841 // Protect against exponential runtime and stack overflow.
4842 if (Depth > 6)
4843 return false;
4844 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4845 bool IsOR = Opcode == TargetOpcode::G_OR;
4846 Register O0 = ValDef->getOperand(1).getReg();
4847 Register O1 = ValDef->getOperand(2).getReg();
4848 bool CanNegateL;
4849 bool MustBeFirstL;
4850 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4851 return false;
4852 bool CanNegateR;
4853 bool MustBeFirstR;
4854 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4855 return false;
4856
4857 if (MustBeFirstL && MustBeFirstR)
4858 return false;
4859
4860 if (IsOR) {
4861 // For an OR expression we need to be able to naturally negate at least
4862 // one side or we cannot do the transformation at all.
4863 if (!CanNegateL && !CanNegateR)
4864 return false;
4865 // If we the result of the OR will be negated and we can naturally negate
4866 // the leaves, then this sub-tree as a whole negates naturally.
4867 CanNegate = WillNegate && CanNegateL && CanNegateR;
4868 // If we cannot naturally negate the whole sub-tree, then this must be
4869 // emitted first.
4870 MustBeFirst = !CanNegate;
4871 } else {
4872 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4872, __extension__ __PRETTY_FUNCTION__))
;
4873 // We cannot naturally negate an AND operation.
4874 CanNegate = false;
4875 MustBeFirst = MustBeFirstL || MustBeFirstR;
4876 }
4877 return true;
4878 }
4879 return false;
4880}
4881
4882MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4883 Register LHS, Register RHS, CmpInst::Predicate CC,
4884 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4885 MachineIRBuilder &MIB) const {
4886 // TODO: emit CMN as an optimization.
4887 auto &MRI = *MIB.getMRI();
4888 LLT OpTy = MRI.getType(LHS);
4889 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4889, __extension__ __PRETTY_FUNCTION__))
;
4890 unsigned CCmpOpc;
4891 if (CmpInst::isIntPredicate(CC)) {
4892 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4893 } else {
4894 switch (OpTy.getSizeInBits()) {
4895 case 16:
4896 CCmpOpc = AArch64::FCCMPHrr;
4897 break;
4898 case 32:
4899 CCmpOpc = AArch64::FCCMPSrr;
4900 break;
4901 case 64:
4902 CCmpOpc = AArch64::FCCMPDrr;
4903 break;
4904 default:
4905 return nullptr;
4906 }
4907 }
4908 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4909 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4910 auto CCmp =
4911 MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
4912 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4913 return &*CCmp;
4914}
4915
4916MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4917 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4918 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4919 // We're at a tree leaf, produce a conditional comparison operation.
4920 auto &MRI = *MIB.getMRI();
4921 MachineInstr *ValDef = MRI.getVRegDef(Val);
4922 unsigned Opcode = ValDef->getOpcode();
4923 if (Opcode == TargetOpcode::G_TRUNC) {
4924 // Look through a trunc.
4925 Val = ValDef->getOperand(1).getReg();
4926 ValDef = MRI.getVRegDef(Val);
4927 Opcode = ValDef->getOpcode();
4928 }
4929 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4930 Register LHS = Cmp->getLHSReg();
4931 Register RHS = Cmp->getRHSReg();
4932 CmpInst::Predicate CC = Cmp->getCond();
4933 if (Negate)
4934 CC = CmpInst::getInversePredicate(CC);
4935 if (isa<GICmp>(Cmp)) {
4936 OutCC = changeICMPPredToAArch64CC(CC);
4937 } else {
4938 // Handle special FP cases.
4939 AArch64CC::CondCode ExtraCC;
4940 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4941 // Some floating point conditions can't be tested with a single condition
4942 // code. Construct an additional comparison in this case.
4943 if (ExtraCC != AArch64CC::AL) {
4944 MachineInstr *ExtraCmp;
4945 if (!CCOp)
4946 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4947 else
4948 ExtraCmp =
4949 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4950 CCOp = ExtraCmp->getOperand(0).getReg();
4951 Predicate = ExtraCC;
4952 }
4953 }
4954
4955 // Produce a normal comparison if we are first in the chain
4956 if (!CCOp) {
4957 auto Dst = MRI.cloneVirtualRegister(LHS);
4958 if (isa<GICmp>(Cmp))
4959 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4960 return emitFPCompare(Cmp->getOperand(2).getReg(),
4961 Cmp->getOperand(3).getReg(), MIB);
4962 }
4963 // Otherwise produce a ccmp.
4964 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4965 }
4966 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4966, __extension__ __PRETTY_FUNCTION__))
;
4967
4968 bool IsOR = Opcode == TargetOpcode::G_OR;
4969
4970 Register LHS = ValDef->getOperand(1).getReg();
4971 bool CanNegateL;
4972 bool MustBeFirstL;
4973 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4974 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4974, __extension__ __PRETTY_FUNCTION__))
;
4975 (void)ValidL;
4976
4977 Register RHS = ValDef->getOperand(2).getReg();
4978 bool CanNegateR;
4979 bool MustBeFirstR;
4980 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4981 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4981, __extension__ __PRETTY_FUNCTION__))
;
4982 (void)ValidR;
4983
4984 // Swap sub-tree that must come first to the right side.
4985 if (MustBeFirstL) {
4986 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4986, __extension__ __PRETTY_FUNCTION__))
;
4987 std::swap(LHS, RHS);
4988 std::swap(CanNegateL, CanNegateR);
4989 std::swap(MustBeFirstL, MustBeFirstR);
4990 }
4991
4992 bool NegateR;
4993 bool NegateAfterR;
4994 bool NegateL;
4995 bool NegateAfterAll;
4996 if (Opcode == TargetOpcode::G_OR) {
4997 // Swap the sub-tree that we can negate naturally to the left.
4998 if (!CanNegateL) {
4999 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4999, __extension__ __PRETTY_FUNCTION__))
;
5000 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5000, __extension__ __PRETTY_FUNCTION__))
;
5001 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5001, __extension__ __PRETTY_FUNCTION__))
;
5002 std::swap(LHS, RHS);
5003 NegateR = false;
5004 NegateAfterR = true;
5005 } else {
5006 // Negate the left sub-tree if possible, otherwise negate the result.
5007 NegateR = CanNegateR;
5008 NegateAfterR = !CanNegateR;
5009 }
5010 NegateL = true;
5011 NegateAfterAll = !Negate;
5012 } else {
5013 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5014, __extension__ __PRETTY_FUNCTION__))
5014 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5014, __extension__ __PRETTY_FUNCTION__))
;
5015 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5015, __extension__ __PRETTY_FUNCTION__))
;
5016
5017 NegateL = false;
5018 NegateR = false;
5019 NegateAfterR = false;
5020 NegateAfterAll = false;
5021 }
5022
5023 // Emit sub-trees.
5024 AArch64CC::CondCode RHSCC;
5025 MachineInstr *CmpR =
5026 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
5027 if (NegateAfterR)
5028 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
5029 MachineInstr *CmpL = emitConjunctionRec(
5030 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
5031 if (NegateAfterAll)
5032 OutCC = AArch64CC::getInvertedCondCode(OutCC);
5033 return CmpL;
5034}
5035
5036MachineInstr *AArch64InstructionSelector::emitConjunction(
5037 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5038 bool DummyCanNegate;
5039 bool DummyMustBeFirst;
5040 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5041 *MIB.getMRI()))
5042 return nullptr;
5043 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5044}
5045
5046bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5047 MachineInstr &CondMI) {
5048 AArch64CC::CondCode AArch64CC;
5049 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5050 if (!ConjMI)
5051 return false;
5052
5053 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5054 SelI.eraseFromParent();
5055 return true;
5056}
5057
5058bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5059 MachineRegisterInfo &MRI = *MIB.getMRI();
5060 // We want to recognize this pattern:
5061 //
5062 // $z = G_FCMP pred, $x, $y
5063 // ...
5064 // $w = G_SELECT $z, $a, $b
5065 //
5066 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5067 // some copies/truncs in between.)
5068 //
5069 // If we see this, then we can emit something like this:
5070 //
5071 // fcmp $x, $y
5072 // fcsel $w, $a, $b, pred
5073 //
5074 // Rather than emitting both of the rather long sequences in the standard
5075 // G_FCMP/G_SELECT select methods.
5076
5077 // First, check if the condition is defined by a compare.
5078 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5079 while (CondDef) {
5080 // We can only fold if all of the defs have one use.
5081 Register CondDefReg = CondDef->getOperand(0).getReg();
5082 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5083 // Unless it's another select.
5084 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5085 if (CondDef == &UI)
5086 continue;
5087 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5088 return false;
5089 }
5090 }
5091
5092 // We can skip over G_TRUNC since the condition is 1-bit.
5093 // Truncating/extending can have no impact on the value.
5094 unsigned Opc = CondDef->getOpcode();
5095 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
5096 break;
5097
5098 // Can't see past copies from physregs.
5099 if (Opc == TargetOpcode::COPY &&
5100 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
5101 return false;
5102
5103 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
5104 }
5105
5106 // Is the condition defined by a compare?
5107 if (!CondDef)
5108 return false;
5109
5110 unsigned CondOpc = CondDef->getOpcode();
5111 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5112 if (tryOptSelectConjunction(I, *CondDef))
5113 return true;
5114 return false;
5115 }
5116
5117 AArch64CC::CondCode CondCode;
5118 if (CondOpc == TargetOpcode::G_ICMP) {
5119 auto Pred =
5120 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5121 CondCode = changeICMPPredToAArch64CC(Pred);
5122 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5123 CondDef->getOperand(1), MIB);
5124 } else {
5125 // Get the condition code for the select.
5126 auto Pred =
5127 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5128 AArch64CC::CondCode CondCode2;
5129 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5130
5131 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5132 // instructions to emit the comparison.
5133 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5134 // unnecessary.
5135 if (CondCode2 != AArch64CC::AL)
5136 return false;
5137
5138 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5139 CondDef->getOperand(3).getReg(), MIB)) {
5140 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5141 return false;
5142 }
5143 }
5144
5145 // Emit the select.
5146 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5147 I.getOperand(3).getReg(), CondCode, MIB);
5148 I.eraseFromParent();
5149 return true;
5150}
5151
5152MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5153 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5154 MachineIRBuilder &MIRBuilder) const {
5155 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
5156 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5156, __extension__ __PRETTY_FUNCTION__))
;
5157 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5158 // We want to find this sort of thing:
5159 // x = G_SUB 0, y
5160 // G_ICMP z, x
5161 //
5162 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5163 // e.g:
5164 //
5165 // cmn z, y
5166
5167 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5168 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5169 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5170 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5171 // Given this:
5172 //
5173 // x = G_SUB 0, y
5174 // G_ICMP x, z
5175 //
5176 // Produce this:
5177 //
5178 // cmn y, z
5179 if (isCMN(LHSDef, P, MRI))
5180 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5181
5182 // Same idea here, but with the RHS of the compare instead:
5183 //
5184 // Given this:
5185 //
5186 // x = G_SUB 0, y
5187 // G_ICMP z, x
5188 //
5189 // Produce this:
5190 //
5191 // cmn z, y
5192 if (isCMN(RHSDef, P, MRI))
5193 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5194
5195 // Given this:
5196 //
5197 // z = G_AND x, y
5198 // G_ICMP z, 0
5199 //
5200 // Produce this if the compare is signed:
5201 //
5202 // tst x, y
5203 if (!CmpInst::isUnsigned(P) && LHSDef &&
5204 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5205 // Make sure that the RHS is 0.
5206 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5207 if (!ValAndVReg || ValAndVReg->Value != 0)
5208 return nullptr;
5209
5210 return emitTST(LHSDef->getOperand(1),
5211 LHSDef->getOperand(2), MIRBuilder);
5212 }
5213
5214 return nullptr;
5215}
5216
5217bool AArch64InstructionSelector::selectShuffleVector(
5218 MachineInstr &I, MachineRegisterInfo &MRI) {
5219 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5220 Register Src1Reg = I.getOperand(1).getReg();
5221 const LLT Src1Ty = MRI.getType(Src1Reg);
5222 Register Src2Reg = I.getOperand(2).getReg();
5223 const LLT Src2Ty = MRI.getType(Src2Reg);
5224 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5225
5226 MachineBasicBlock &MBB = *I.getParent();
5227 MachineFunction &MF = *MBB.getParent();
5228 LLVMContext &Ctx = MF.getFunction().getContext();
5229
5230 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5231 // it's originated from a <1 x T> type. Those should have been lowered into
5232 // G_BUILD_VECTOR earlier.
5233 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5234 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5235 return false;
5236 }
5237
5238 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5239
5240 SmallVector<Constant *, 64> CstIdxs;
5241 for (int Val : Mask) {
5242 // For now, any undef indexes we'll just assume to be 0. This should be
5243 // optimized in future, e.g. to select DUP etc.
5244 Val = Val < 0 ? 0 : Val;
5245 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5246 unsigned Offset = Byte + Val * BytesPerElt;
5247 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5248 }
5249 }
5250
5251 // Use a constant pool to load the index vector for TBL.
5252 Constant *CPVal = ConstantVector::get(CstIdxs);
5253 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5254 if (!IndexLoad) {
5255 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5256 return false;
5257 }
5258
5259 if (DstTy.getSizeInBits() != 128) {
5260 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5260, __extension__ __PRETTY_FUNCTION__))
;
5261 // This case can be done with TBL1.
5262 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
5263 if (!Concat) {
5264 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5265 return false;
5266 }
5267
5268 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5269 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5270 IndexLoad->getOperand(0).getReg(), MIB);
5271
5272 auto TBL1 = MIB.buildInstr(
5273 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5274 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5275 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5276
5277 auto Copy =
5278 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5279 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5280 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5281 I.eraseFromParent();
5282 return true;
5283 }
5284
5285 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5286 // Q registers for regalloc.
5287 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5288 auto RegSeq = createQTuple(Regs, MIB);
5289 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5290 {RegSeq, IndexLoad->getOperand(0)});
5291 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5292 I.eraseFromParent();
5293 return true;
5294}
5295
5296MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5297 Optional<Register> DstReg, Register SrcReg, Register EltReg,
5298 unsigned LaneIdx, const RegisterBank &RB,
5299 MachineIRBuilder &MIRBuilder) const {
5300 MachineInstr *InsElt = nullptr;
5301 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5302 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5303
5304 // Create a register to define with the insert if one wasn't passed in.
5305 if (!DstReg)
5306 DstReg = MRI.createVirtualRegister(DstRC);
5307
5308 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5309 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5310
5311 if (RB.getID() == AArch64::FPRRegBankID) {
5312 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5313 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5314 .addImm(LaneIdx)
5315 .addUse(InsSub->getOperand(0).getReg())
5316 .addImm(0);
5317 } else {
5318 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5319 .addImm(LaneIdx)
5320 .addUse(EltReg);
5321 }
5322
5323 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5324 return InsElt;
5325}
5326
5327bool AArch64InstructionSelector::selectUSMovFromExtend(
5328 MachineInstr &MI, MachineRegisterInfo &MRI) {
5329 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5330 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5331 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5332 return false;
5333 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5334 const Register DefReg = MI.getOperand(0).getReg();
5335 const LLT DstTy = MRI.getType(DefReg);
5336 unsigned DstSize = DstTy.getSizeInBits();
5337
5338 if (DstSize != 32 && DstSize != 64)
5339 return false;
5340
5341 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5342 MI.getOperand(1).getReg(), MRI);
5343 int64_t Lane;
5344 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5345 return false;
5346 Register Src0 = Extract->getOperand(1).getReg();
5347
5348 const LLT &VecTy = MRI.getType(Src0);
5349
5350 if (VecTy.getSizeInBits() != 128) {
5351 const MachineInstr *ScalarToVector = emitScalarToVector(
5352 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5353 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5353, __extension__ __PRETTY_FUNCTION__))
;
5354 Src0 = ScalarToVector->getOperand(0).getReg();
5355 }
5356
5357 unsigned Opcode;
5358 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5359 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5360 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5361 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5362 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5363 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5364 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5365 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5366 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5367 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5368 else
5369 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5369)
;
5370
5371 // We may need to generate one of these, depending on the type and sign of the
5372 // input:
5373 // DstReg = SMOV Src0, Lane;
5374 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5375 MachineInstr *ExtI = nullptr;
5376 if (DstSize == 64 && !IsSigned) {
5377 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5378 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5379 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5380 .addImm(0)
5381 .addUse(NewReg)
5382 .addImm(AArch64::sub_32);
5383 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5384 } else
5385 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5386
5387 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5388 MI.eraseFromParent();
5389 return true;
5390}
5391
5392bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5393 MachineRegisterInfo &MRI) {
5394 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5394, __extension__ __PRETTY_FUNCTION__))
;
5395
5396 // Get information on the destination.
5397 Register DstReg = I.getOperand(0).getReg();
5398 const LLT DstTy = MRI.getType(DstReg);
5399 unsigned VecSize = DstTy.getSizeInBits();
5400
5401 // Get information on the element we want to insert into the destination.
5402 Register EltReg = I.getOperand(2).getReg();
5403 const LLT EltTy = MRI.getType(EltReg);
5404 unsigned EltSize = EltTy.getSizeInBits();
5405 if (EltSize < 16 || EltSize > 64)
5406 return false; // Don't support all element types yet.
5407
5408 // Find the definition of the index. Bail out if it's not defined by a
5409 // G_CONSTANT.
5410 Register IdxReg = I.getOperand(3).getReg();
5411 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5412 if (!VRegAndVal)
5413 return false;
5414 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5415
5416 // Perform the lane insert.
5417 Register SrcReg = I.getOperand(1).getReg();
5418 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5419
5420 if (VecSize < 128) {
5421 // If the vector we're inserting into is smaller than 128 bits, widen it
5422 // to 128 to do the insert.
5423 MachineInstr *ScalarToVec =
5424 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5425 if (!ScalarToVec)
5426 return false;
5427 SrcReg = ScalarToVec->getOperand(0).getReg();
5428 }
5429
5430 // Create an insert into a new FPR128 register.
5431 // Note that if our vector is already 128 bits, we end up emitting an extra
5432 // register.
5433 MachineInstr *InsMI =
5434 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5435
5436 if (VecSize < 128) {
5437 // If we had to widen to perform the insert, then we have to demote back to
5438 // the original size to get the result we want.
5439 Register DemoteVec = InsMI->getOperand(0).getReg();
5440 const TargetRegisterClass *RC =
5441 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
5442 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5443 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5444 return false;
5445 }
5446 unsigned SubReg = 0;
5447 if (!getSubRegForClass(RC, TRI, SubReg))
5448 return false;
5449 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5450 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5451 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5452 return false;
5453 }
5454 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5455 .addReg(DemoteVec, 0, SubReg);
5456 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5457 } else {
5458 // No widening needed.
5459 InsMI->getOperand(0).setReg(DstReg);
5460 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5461 }
5462
5463 I.eraseFromParent();
5464 return true;
5465}
5466
5467MachineInstr *
5468AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5469 MachineIRBuilder &MIRBuilder,
5470 MachineRegisterInfo &MRI) {
5471 LLT DstTy = MRI.getType(Dst);
5472 unsigned DstSize = DstTy.getSizeInBits();
5473 if (CV->isNullValue()) {
5474 if (DstSize == 128) {
5475 auto Mov =
5476 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5477 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5478 return &*Mov;
5479 }
5480
5481 if (DstSize == 64) {
5482 auto Mov =
5483 MIRBuilder
5484 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5485 .addImm(0);
5486 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5487 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5488 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5489 return &*Copy;
5490 }
5491 }
5492
5493 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5494 if (!CPLoad) {
5495 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5496 return nullptr;
5497 }
5498
5499 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5500 RBI.constrainGenericRegister(
5501 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5502 return &*Copy;
5503}
5504
5505bool AArch64InstructionSelector::tryOptConstantBuildVec(
5506 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5507 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5507, __extension__ __PRETTY_FUNCTION__))
;
5508 unsigned DstSize = DstTy.getSizeInBits();
5509 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5509, __extension__ __PRETTY_FUNCTION__))
;
5510 if (DstSize < 32)
5511 return false;
5512 // Check if we're building a constant vector, in which case we want to
5513 // generate a constant pool load instead of a vector insert sequence.
5514 SmallVector<Constant *, 16> Csts;
5515 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5516 // Try to find G_CONSTANT or G_FCONSTANT
5517 auto *OpMI =
5518 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5519 if (OpMI)
5520 Csts.emplace_back(
5521 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5522 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5523 I.getOperand(Idx).getReg(), MRI)))
5524 Csts.emplace_back(
5525 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5526 else
5527 return false;
5528 }
5529 Constant *CV = ConstantVector::get(Csts);
5530 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5531 return false;
5532 I.eraseFromParent();
5533 return true;
5534}
5535
5536bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5537 MachineInstr &I, MachineRegisterInfo &MRI) {
5538 // Given:
5539 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5540 //
5541 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5542 Register Dst = I.getOperand(0).getReg();
5543 Register EltReg = I.getOperand(1).getReg();
5544 LLT EltTy = MRI.getType(EltReg);
5545 // If the index isn't on the same bank as its elements, then this can't be a
5546 // SUBREG_TO_REG.
5547 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5548 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5549 if (EltRB != DstRB)
5550 return false;
5551 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5552 [&MRI](const MachineOperand &Op) {
5553 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5554 MRI);
5555 }))
5556 return false;
5557 unsigned SubReg;
5558 const TargetRegisterClass *EltRC =
5559 getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
5560 if (!EltRC)
5561 return false;
5562 const TargetRegisterClass *DstRC =
5563 getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
5564 if (!DstRC)
5565 return false;
5566 if (!getSubRegForClass(EltRC, TRI, SubReg))
5567 return false;
5568 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5569 .addImm(0)
5570 .addUse(EltReg)
5571 .addImm(SubReg);
5572 I.eraseFromParent();
5573 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5574 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5575}
5576
5577bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5578 MachineRegisterInfo &MRI) {
5579 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5579, __extension__ __PRETTY_FUNCTION__))
;
5580 // Until we port more of the optimized selections, for now just use a vector
5581 // insert sequence.
5582 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5583 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5584 unsigned EltSize = EltTy.getSizeInBits();
5585
5586 if (tryOptConstantBuildVec(I, DstTy, MRI))
5587 return true;
5588 if (tryOptBuildVecToSubregToReg(I, MRI))
5589 return true;
5590
5591 if (EltSize < 16 || EltSize > 64)
5592 return false; // Don't support all element types yet.
5593 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5594
5595 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5596 MachineInstr *ScalarToVec =
5597 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5598 I.getOperand(1).getReg(), MIB);
5599 if (!ScalarToVec)
5600 return false;
5601
5602 Register DstVec = ScalarToVec->getOperand(0).getReg();
5603 unsigned DstSize = DstTy.getSizeInBits();
5604
5605 // Keep track of the last MI we inserted. Later on, we might be able to save
5606 // a copy using it.
5607 MachineInstr *PrevMI = nullptr;
5608 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5609 // Note that if we don't do a subregister copy, we can end up making an
5610 // extra register.
5611 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5612 MIB);
5613 DstVec = PrevMI->getOperand(0).getReg();
5614 }
5615
5616 // If DstTy's size in bits is less than 128, then emit a subregister copy
5617 // from DstVec to the last register we've defined.
5618 if (DstSize < 128) {
5619 // Force this to be FPR using the destination vector.
5620 const TargetRegisterClass *RC =
5621 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
5622 if (!RC)
5623 return false;
5624 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5625 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5626 return false;
5627 }
5628
5629 unsigned SubReg = 0;
5630 if (!getSubRegForClass(RC, TRI, SubReg))
5631 return false;
5632 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5633 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5634 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5635 return false;
5636 }
5637
5638 Register Reg = MRI.createVirtualRegister(RC);
5639 Register DstReg = I.getOperand(0).getReg();
5640
5641 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5642 MachineOperand &RegOp = I.getOperand(1);
5643 RegOp.setReg(Reg);
5644 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5645 } else {
5646 // We don't need a subregister copy. Save a copy by re-using the
5647 // destination register on the final insert.
5648 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5648, __extension__ __PRETTY_FUNCTION__))
;
5649 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5650 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5651 }
5652
5653 I.eraseFromParent();
5654 return true;
5655}
5656
5657bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5658 unsigned NumVecs,
5659 MachineInstr &I) {
5660 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5660, __extension__ __PRETTY_FUNCTION__))
;
5661 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5661, __extension__ __PRETTY_FUNCTION__))
;
5662 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5662, __extension__ __PRETTY_FUNCTION__))
;
5663 auto &MRI = *MIB.getMRI();
5664 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5665 unsigned Size = Ty.getSizeInBits();
5666 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5667, __extension__ __PRETTY_FUNCTION__))
5667 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5667, __extension__ __PRETTY_FUNCTION__))
;
5668 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5669 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5670 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5670, __extension__ __PRETTY_FUNCTION__))
;
5671 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5672 Load.cloneMemRefs(I);
5673 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5674 Register SelectedLoadDst = Load->getOperand(0).getReg();
5675 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5676 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5677 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5678 // Emit the subreg copies and immediately select them.
5679 // FIXME: We should refactor our copy code into an emitCopy helper and
5680 // clean up uses of this pattern elsewhere in the selector.
5681 selectCopy(*Vec, TII, MRI, TRI, RBI);
5682 }
5683 return true;
5684}
5685
5686bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5687 MachineInstr &I, MachineRegisterInfo &MRI) {
5688 // Find the intrinsic ID.
5689 unsigned IntrinID = I.getIntrinsicID();
5690
5691 const LLT S8 = LLT::scalar(8);
5692 const LLT S16 = LLT::scalar(16);
5693 const LLT S32 = LLT::scalar(32);
5694 const LLT S64 = LLT::scalar(64);
5695 const LLT P0 = LLT::pointer(0, 64);
5696 // Select the instruction.
5697 switch (IntrinID) {
5698 default:
5699 return false;
5700 case Intrinsic::aarch64_ldxp:
5701 case Intrinsic::aarch64_ldaxp: {
5702 auto NewI = MIB.buildInstr(
5703 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5704 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5705 {I.getOperand(3)});
5706 NewI.cloneMemRefs(I);
5707 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5708 break;
5709 }
5710 case Intrinsic::trap:
5711 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5712 break;
5713 case Intrinsic::debugtrap:
5714 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5715 break;
5716 case Intrinsic::ubsantrap:
5717 MIB.buildInstr(AArch64::BRK, {}, {})
5718 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5719 break;
5720 case Intrinsic::aarch64_neon_ld2: {
5721 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5722 unsigned Opc = 0;
5723 if (Ty == LLT::fixed_vector(8, S8))
5724 Opc = AArch64::LD2Twov8b;
5725 else if (Ty == LLT::fixed_vector(16, S8))
5726 Opc = AArch64::LD2Twov16b;
5727 else if (Ty == LLT::fixed_vector(4, S16))
5728 Opc = AArch64::LD2Twov4h;
5729 else if (Ty == LLT::fixed_vector(8, S16))
5730 Opc = AArch64::LD2Twov8h;
5731 else if (Ty == LLT::fixed_vector(2, S32))
5732 Opc = AArch64::LD2Twov2s;
5733 else if (Ty == LLT::fixed_vector(4, S32))
5734 Opc = AArch64::LD2Twov4s;
5735 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5736 Opc = AArch64::LD2Twov2d;
5737 else if (Ty == S64 || Ty == P0)
5738 Opc = AArch64::LD1Twov1d;
5739 else
5740 llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5740)
;
5741 selectVectorLoadIntrinsic(Opc, 2, I);
5742 break;
5743 }
5744 case Intrinsic::aarch64_neon_ld4: {
5745 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5746 unsigned Opc = 0;
5747 if (Ty == LLT::fixed_vector(8, S8))
5748 Opc = AArch64::LD4Fourv8b;
5749 else if (Ty == LLT::fixed_vector(16, S8))
5750 Opc = AArch64::LD4Fourv16b;
5751 else if (Ty == LLT::fixed_vector(4, S16))
5752 Opc = AArch64::LD4Fourv4h;
5753 else if (Ty == LLT::fixed_vector(8, S16))
5754 Opc = AArch64::LD4Fourv8h;
5755 else if (Ty == LLT::fixed_vector(2, S32))
5756 Opc = AArch64::LD4Fourv2s;
5757 else if (Ty == LLT::fixed_vector(4, S32))
5758 Opc = AArch64::LD4Fourv4s;
5759 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5760 Opc = AArch64::LD4Fourv2d;
5761 else if (Ty == S64 || Ty == P0)
5762 Opc = AArch64::LD1Fourv1d;
5763 else
5764 llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5764)
;
5765 selectVectorLoadIntrinsic(Opc, 4, I);
5766 break;
5767 }
5768 case Intrinsic::aarch64_neon_st2: {
5769 Register Src1 = I.getOperand(1).getReg();
5770 Register Src2 = I.getOperand(2).getReg();
5771 Register Ptr = I.getOperand(3).getReg();
5772 LLT Ty = MRI.getType(Src1);
5773 unsigned Opc;
5774 if (Ty == LLT::fixed_vector(8, S8))
5775 Opc = AArch64::ST2Twov8b;
5776 else if (Ty == LLT::fixed_vector(16, S8))
5777 Opc = AArch64::ST2Twov16b;
5778 else if (Ty == LLT::fixed_vector(4, S16))
5779 Opc = AArch64::ST2Twov4h;
5780 else if (Ty == LLT::fixed_vector(8, S16))
5781 Opc = AArch64::ST2Twov8h;
5782 else if (Ty == LLT::fixed_vector(2, S32))
5783 Opc = AArch64::ST2Twov2s;
5784 else if (Ty == LLT::fixed_vector(4, S32))
5785 Opc = AArch64::ST2Twov4s;
5786 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5787 Opc = AArch64::ST2Twov2d;
5788 else if (Ty == S64 || Ty == P0)
5789 Opc = AArch64::ST1Twov1d;
5790 else
5791 llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5791)
;
5792 SmallVector<Register, 2> Regs = {Src1, Src2};
5793 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5794 : createDTuple(Regs, MIB);
5795 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5796 Store.cloneMemRefs(I);
5797 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5798 break;
5799 }
5800 case Intrinsic::aarch64_mops_memset_tag: {
5801 // Transform
5802 // %dst:gpr(p0) = \
5803 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
5804 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
5805 // where %dst is updated, into
5806 // %Rd:GPR64common, %Rn:GPR64) = \
5807 // MOPSMemorySetTaggingPseudo \
5808 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
5809 // where Rd and Rn are tied.
5810 // It is expected that %val has been extended to s64 in legalization.
5811 // Note that the order of the size/value operands are swapped.
5812
5813 Register DstDef = I.getOperand(0).getReg();
5814 // I.getOperand(1) is the intrinsic function
5815 Register DstUse = I.getOperand(2).getReg();
5816 Register ValUse = I.getOperand(3).getReg();
5817 Register SizeUse = I.getOperand(4).getReg();
5818
5819 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
5820 // Therefore an additional virtual register is requried for the updated size
5821 // operand. This value is not accessible via the semantics of the intrinsic.
5822 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
5823
5824 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
5825 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
5826 Memset.cloneMemRefs(I);
5827 constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
5828 break;
5829 }
5830 }
5831
5832 I.eraseFromParent();
5833 return true;
5834}
5835
5836bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5837 MachineRegisterInfo &MRI) {
5838 unsigned IntrinID = I.getIntrinsicID();
5839
5840 switch (IntrinID) {
5841 default:
5842 break;
5843 case Intrinsic::aarch64_crypto_sha1h: {
5844 Register DstReg = I.getOperand(0).getReg();
5845 Register SrcReg = I.getOperand(2).getReg();
5846
5847 // FIXME: Should this be an assert?
5848 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5849 MRI.getType(SrcReg).getSizeInBits() != 32)
5850 return false;
5851
5852 // The operation has to happen on FPRs. Set up some new FPR registers for
5853 // the source and destination if they are on GPRs.
5854 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5855 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5856 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5857
5858 // Make sure the copy ends up getting constrained properly.
5859 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5860 AArch64::GPR32RegClass, MRI);
5861 }
5862
5863 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5864 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5865
5866 // Actually insert the instruction.
5867 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5868 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5869
5870 // Did we create a new register for the destination?
5871 if (DstReg != I.getOperand(0).getReg()) {
5872 // Yep. Copy the result of the instruction back into the original
5873 // destination.
5874 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5875 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5876 AArch64::GPR32RegClass, MRI);
5877 }
5878
5879 I.eraseFromParent();
5880 return true;
5881 }
5882 case Intrinsic::ptrauth_sign: {
5883 Register DstReg = I.getOperand(0).getReg();
5884 Register ValReg = I.getOperand(2).getReg();
5885 uint64_t Key = I.getOperand(3).getImm();
5886 Register DiscReg = I.getOperand(4).getReg();
5887 auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
5888 bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
5889
5890 if (Key > 3)
5891 return false;
5892
5893 unsigned Opcodes[][4] = {
5894 {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
5895 {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
5896 unsigned Opcode = Opcodes[IsDiscZero][Key];
5897
5898 auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5899
5900 if (!IsDiscZero) {
5901 PAC.addUse(DiscReg);
5902 RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
5903 }
5904
5905 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5906 I.eraseFromParent();
5907 return true;
5908 }
5909 case Intrinsic::frameaddress:
5910 case Intrinsic::returnaddress: {
5911 MachineFunction &MF = *I.getParent()->getParent();
5912 MachineFrameInfo &MFI = MF.getFrameInfo();
5913
5914 unsigned Depth = I.getOperand(2).getImm();
5915 Register DstReg = I.getOperand(0).getReg();
5916 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5917
5918 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5919 if (!MFReturnAddr) {
5920 // Insert the copy from LR/X30 into the entry block, before it can be
5921 // clobbered by anything.
5922 MFI.setReturnAddressIsTaken(true);
5923 MFReturnAddr = getFunctionLiveInPhysReg(
5924 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
5925 }
5926
5927 if (STI.hasPAuth()) {
5928 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5929 } else {
5930 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5931 MIB.buildInstr(AArch64::XPACLRI);
5932 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5933 }
5934
5935 I.eraseFromParent();
5936 return true;
5937 }
5938
5939 MFI.setFrameAddressIsTaken(true);
5940 Register FrameAddr(AArch64::FP);
5941 while (Depth--) {
5942 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5943 auto Ldr =
5944 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5945 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5946 FrameAddr = NextFrame;
5947 }
5948
5949 if (IntrinID == Intrinsic::frameaddress)
5950 MIB.buildCopy({DstReg}, {FrameAddr});
5951 else {
5952 MFI.setReturnAddressIsTaken(true);
5953
5954 if (STI.hasPAuth()) {
5955 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5956 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5957 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5958 } else {
5959 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5960 .addImm(1);
5961 MIB.buildInstr(AArch64::XPACLRI);
5962 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5963 }
5964 }
5965
5966 I.eraseFromParent();
5967 return true;
5968 }
5969 case Intrinsic::swift_async_context_addr:
5970 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5971 {Register(AArch64::FP)})
5972 .addImm(8)
5973 .addImm(0);
5974 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5975
5976 MF->getFrameInfo().setFrameAddressIsTaken(true);
5977 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5978 I.eraseFromParent();
5979 return true;
5980 }
5981 return false;
5982}
5983
5984InstructionSelector::ComplexRendererFns
5985AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5986 auto MaybeImmed = getImmedFromMO(Root);
5987 if (MaybeImmed == None || *MaybeImmed > 31)
5988 return None;
5989 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5990 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5991}
5992
5993InstructionSelector::ComplexRendererFns
5994AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5995 auto MaybeImmed = getImmedFromMO(Root);
5996 if (MaybeImmed == None || *MaybeImmed > 31)
5997 return None;
5998 uint64_t Enc = 31 - *MaybeImmed;
5999 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6000}
6001
6002InstructionSelector::ComplexRendererFns
6003AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6004 auto MaybeImmed = getImmedFromMO(Root);
6005 if (MaybeImmed == None || *MaybeImmed > 63)
6006 return None;
6007 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6008 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6009}
6010
6011InstructionSelector::ComplexRendererFns
6012AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6013 auto MaybeImmed = getImmedFromMO(Root);
6014 if (MaybeImmed == None || *MaybeImmed > 63)
6015 return None;
6016 uint64_t Enc = 63 - *MaybeImmed;
6017 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6018}
6019
6020/// Helper to select an immediate value that can be represented as a 12-bit
6021/// value shifted left by either 0 or 12. If it is possible to do so, return
6022/// the immediate and shift value. If not, return None.
6023///
6024/// Used by selectArithImmed and selectNegArithImmed.
6025InstructionSelector::ComplexRendererFns
6026AArch64InstructionSelector::select12BitValueWithLeftShift(
6027 uint64_t Immed) const {
6028 unsigned ShiftAmt;
6029 if (Immed >> 12 == 0) {
6030 ShiftAmt = 0;
6031 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6032 ShiftAmt = 12;
6033 Immed = Immed >> 12;
6034 } else
6035 return None;
6036
6037 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6038 return {{
6039 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6040 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6041 }};
6042}
6043
6044/// SelectArithImmed - Select an immediate value that can be represented as
6045/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6046/// Val set to the 12-bit value and Shift set to the shifter operand.
6047InstructionSelector::ComplexRendererFns
6048AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6049 // This function is called from the addsub_shifted_imm ComplexPattern,
6050 // which lists [imm] as the list of opcode it's interested in, however
6051 // we still need to check whether the operand is actually an immediate
6052 // here because the ComplexPattern opcode list is only used in
6053 // root-level opcode matching.
6054 auto MaybeImmed = getImmedFromMO(Root);
6055 if (MaybeImmed == None)
6056 return None;
6057 return select12BitValueWithLeftShift(*MaybeImmed);
6058}
6059
6060/// SelectNegArithImmed - As above, but negates the value before trying to
6061/// select it.
6062InstructionSelector::ComplexRendererFns
6063AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6064 // We need a register here, because we need to know if we have a 64 or 32
6065 // bit immediate.
6066 if (!Root.isReg())
6067 return None;
6068 auto MaybeImmed = getImmedFromMO(Root);
6069 if (MaybeImmed == None)
6070 return None;
6071 uint64_t Immed = *MaybeImmed;
6072
6073 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6074 // have the opposite effect on the C flag, so this pattern mustn't match under
6075 // those circumstances.
6076 if (Immed == 0)
6077 return None;
6078
6079 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6080 // the root.
6081 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6082 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6083 Immed = ~((uint32_t)Immed) + 1;
6084 else
6085 Immed = ~Immed + 1ULL;
6086
6087 if (Immed & 0xFFFFFFFFFF000000ULL)
6088 return None;
6089
6090 Immed &= 0xFFFFFFULL;
6091 return select12BitValueWithLeftShift(Immed);
6092}
6093
6094/// Return true if it is worth folding MI into an extended register. That is,
6095/// if it's safe to pull it into the addressing mode of a load or store as a
6096/// shift.
6097bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6098 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6099 // Always fold if there is one use, or if we're optimizing for size.
6100 Register DefReg = MI.getOperand(0).getReg();
6101 if (MRI.hasOneNonDBGUse(DefReg) ||
6102 MI.getParent()->getParent()->getFunction().hasOptSize())
6103 return true;
6104
6105 // It's better to avoid folding and recomputing shifts when we don't have a
6106 // fastpath.
6107 if (!STI.hasLSLFast())
6108 return false;
6109
6110 // We have a fastpath, so folding a shift in and potentially computing it
6111 // many times may be beneficial. Check if this is only used in memory ops.
6112 // If it is, then we should fold.
6113 return all_of(MRI.use_nodbg_instructions(DefReg),
6114 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6115}
6116
6117static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
6118 switch (Type) {
6119 case AArch64_AM::SXTB:
6120 case AArch64_AM::SXTH:
6121 case AArch64_AM::SXTW:
6122 return true;
6123 default:
6124 return false;
6125 }
6126}
6127
6128InstructionSelector::ComplexRendererFns
6129AArch64InstructionSelector::selectExtendedSHL(
6130 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
6131 unsigned SizeInBytes, bool WantsExt) const {
6132 assert(Base.isReg() && "Expected base to be a register operand")(static_cast <bool> (Base.isReg() && "Expected base to be a register operand"
) ? void (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6132, __extension__ __PRETTY_FUNCTION__))
;
6133 assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast <bool> (Offset.isReg() && "Expected offset to be a register operand"
) ? void (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6133, __extension__ __PRETTY_FUNCTION__))
;
6134
6135 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6136 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6137 if (!OffsetInst)
6138 return None;
6139
6140 unsigned OffsetOpc = OffsetInst->getOpcode();
6141 bool LookedThroughZExt = false;
6142 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6143 // Try to look through a ZEXT.
6144 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6145 return None;
6146
6147 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6148 OffsetOpc = OffsetInst->getOpcode();
6149 LookedThroughZExt = true;
6150
6151 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6152 return None;
6153 }
6154 // Make sure that the memory op is a valid size.
6155 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6156 if (LegalShiftVal == 0)
6157 return None;
6158 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6159 return None;
6160
6161 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6162 // register we will offset is the LHS, and the register containing the
6163 // constant is the RHS.
6164 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6165 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6166 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6167 if (!ValAndVReg) {
6168 // We didn't get a constant on the RHS. If the opcode is a shift, then
6169 // we're done.
6170 if (OffsetOpc == TargetOpcode::G_SHL)
6171 return None;
6172
6173 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6174 std::swap(OffsetReg, ConstantReg);
6175 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6176 if (!ValAndVReg)
6177 return None;
6178 }
6179
6180 // The value must fit into 3 bits, and must be positive. Make sure that is
6181 // true.
6182 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6183
6184 // Since we're going to pull this into a shift, the constant value must be
6185 // a power of 2. If we got a multiply, then we need to check this.
6186 if (OffsetOpc == TargetOpcode::G_MUL) {
6187 if (!isPowerOf2_32(ImmVal))
6188 return None;
6189
6190 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6191 ImmVal = Log2_32(ImmVal);
6192 }
6193
6194 if ((ImmVal & 0x7) != ImmVal)
6195 return None;
6196
6197 // We are only allowed to shift by LegalShiftVal. This shift value is built
6198 // into the instruction, so we can't just use whatever we want.
6199 if (ImmVal != LegalShiftVal)
6200 return None;
6201
6202 unsigned SignExtend = 0;
6203 if (WantsExt) {
6204 // Check if the offset is defined by an extend, unless we looked through a
6205 // G_ZEXT earlier.
6206 if (!LookedThroughZExt) {
6207 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
6208 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
6209 if (Ext == AArch64_AM::InvalidShiftExtend)
6210 return None;
6211
6212 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
6213 // We only support SXTW for signed extension here.
6214 if (SignExtend && Ext != AArch64_AM::SXTW)
6215 return None;
6216 OffsetReg = ExtInst->getOperand(1).getReg();
6217 }
6218
6219 // Need a 32-bit wide register here.
6220 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
6221 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6222 }
6223
6224 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
6225 // offset. Signify that we are shifting by setting the shift flag to 1.
6226 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
6227 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
6228 [=](MachineInstrBuilder &MIB) {
6229 // Need to add both immediates here to make sure that they are both
6230 // added to the instruction.
6231 MIB.addImm(SignExtend);
6232 MIB.addImm(1);
6233 }}};
6234}
6235
6236/// This is used for computing addresses like this:
6237///
6238/// ldr x1, [x2, x3, lsl #3]
6239///
6240/// Where x2 is the base register, and x3 is an offset register. The shift-left
6241/// is a constant value specific to this load instruction. That is, we'll never
6242/// see anything other than a 3 here (which corresponds to the size of the
6243/// element being loaded.)
6244InstructionSelector::ComplexRendererFns
6245AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6246 MachineOperand &Root, unsigned SizeInBytes) const {
6247 if (!Root.isReg())
6248 return None;
6249 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6250
6251 // We want to find something like this:
6252 //
6253 // val = G_CONSTANT LegalShiftVal
6254 // shift = G_SHL off_reg val
6255 // ptr = G_PTR_ADD base_reg shift
6256 // x = G_LOAD ptr
6257 //
6258 // And fold it into this addressing mode:
6259 //
6260 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
6261
6262 // Check if we can find the G_PTR_ADD.
6263 MachineInstr *PtrAdd =
6264 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6265 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6266 return None;
6267
6268 // Now, try to match an opcode which will match our specific offset.
6269 // We want a G_SHL or a G_MUL.
6270 MachineInstr *OffsetInst =
6271 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
6272 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
6273 OffsetInst->getOperand(0), SizeInBytes,
6274 /*WantsExt=*/false);
6275}
6276
6277/// This is used for computing addresses like this:
6278///
6279/// ldr x1, [x2, x3]
6280///
6281/// Where x2 is the base register, and x3 is an offset register.
6282///
6283/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
6284/// this will do so. Otherwise, it will return None.
6285InstructionSelector::ComplexRendererFns
6286AArch64InstructionSelector::selectAddrModeRegisterOffset(
6287 MachineOperand &Root) const {
6288 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6289
6290 // We need a GEP.
6291 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
6292 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
6293 return None;
6294
6295 // If this is used more than once, let's not bother folding.
6296 // TODO: Check if they are memory ops. If they are, then we can still fold
6297 // without having to recompute anything.
6298 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
6299 return None;
6300
6301 // Base is the GEP's LHS, offset is its RHS.
6302 return {{[=](MachineInstrBuilder &MIB) {
6303 MIB.addUse(Gep->getOperand(1).getReg());
6304 },
6305 [=](MachineInstrBuilder &MIB) {
6306 MIB.addUse(Gep->getOperand(2).getReg());
6307 },
6308 [=](MachineInstrBuilder &MIB) {
6309 // Need to add both immediates here to make sure that they are both
6310 // added to the instruction.
6311 MIB.addImm(0);
6312 MIB.addImm(0);
6313 }}};
6314}
6315
6316/// This is intended to be equivalent to selectAddrModeXRO in
6317/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
6318InstructionSelector::ComplexRendererFns
6319AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
6320 unsigned SizeInBytes) const {
6321 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6322 if (!Root.isReg())
6323 return None;
6324 MachineInstr *PtrAdd =
6325 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6326 if (!PtrAdd)
6327 return None;
6328
6329 // Check for an immediates which cannot be encoded in the [base + imm]
6330 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
6331 // end up with code like:
6332 //
6333 // mov x0, wide
6334 // add x1 base, x0
6335 // ldr x2, [x1, x0]
6336 //
6337 // In this situation, we can use the [base, xreg] addressing mode to save an
6338 // add/sub:
6339 //
6340 // mov x0, wide
6341 // ldr x2, [base, x0]
6342 auto ValAndVReg =
6343 getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
6344 if (ValAndVReg) {
6345 unsigned Scale = Log2_32(SizeInBytes);
6346 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
6347
6348 // Skip immediates that can be selected in the load/store addresing
6349 // mode.
6350 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
6351 ImmOff < (0x1000 << Scale))
6352 return None;
6353
6354 // Helper lambda to decide whether or not it is preferable to emit an add.
6355 auto isPreferredADD = [](int64_t ImmOff) {
6356 // Constants in [0x0, 0xfff] can be encoded in an add.
6357 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
6358 return true;
6359
6360 // Can it be encoded in an add lsl #12?
6361 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
6362 return false;
6363
6364 // It can be encoded in an add lsl #12, but we may not want to. If it is
6365 // possible to select this as a single movz, then prefer that. A single
6366 // movz is faster than an add with a shift.
6367 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
6368 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
6369 };
6370
6371 // If the immediate can be encoded in a single add/sub, then bail out.
6372 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
6373 return None;
6374 }
6375
6376 // Try to fold shifts into the addressing mode.
6377 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
6378 if (AddrModeFns)
6379 return AddrModeFns;
6380
6381 // If that doesn't work, see if it's possible to fold in registers from
6382 // a GEP.
6383 return selectAddrModeRegisterOffset(Root);
6384}
6385
6386/// This is used for computing addresses like this:
6387///
6388/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
6389///
6390/// Where we have a 64-bit base register, a 32-bit offset register, and an
6391/// extend (which may or may not be signed).
6392InstructionSelector::ComplexRendererFns
6393AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
6394 unsigned SizeInBytes) const {
6395 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6396
6397 MachineInstr *PtrAdd =
6398 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6399 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6400 return None;
6401
6402 MachineOperand &LHS = PtrAdd->getOperand(1);
6403 MachineOperand &RHS = PtrAdd->getOperand(2);
6404 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
6405
6406 // The first case is the same as selectAddrModeXRO, except we need an extend.
6407 // In this case, we try to find a shift and extend, and fold them into the
6408 // addressing mode.
6409 //
6410 // E.g.
6411 //
6412 // off_reg = G_Z/S/ANYEXT ext_reg
6413 // val = G_CONSTANT LegalShiftVal
6414 // shift = G_SHL off_reg val
6415 // ptr = G_PTR_ADD base_reg shift
6416 // x = G_LOAD ptr
6417 //
6418 // In this case we can get a load like this:
6419 //
6420 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
6421 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
6422 SizeInBytes, /*WantsExt=*/true);
6423 if (ExtendedShl)
6424 return ExtendedShl;
6425
6426 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
6427 //
6428 // e.g.
6429 // ldr something, [base_reg, ext_reg, sxtw]
6430 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6431 return None;
6432
6433 // Check if this is an extend. We'll get an extend type if it is.
6434 AArch64_AM::ShiftExtendType Ext =
6435 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
6436 if (Ext == AArch64_AM::InvalidShiftExtend)
6437 return None;
6438
6439 // Need a 32-bit wide register.
6440 MachineIRBuilder MIB(*PtrAdd);
6441 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
6442 AArch64::GPR32RegClass, MIB);
6443 unsigned SignExtend = Ext == AArch64_AM::SXTW;
6444
6445 // Base is LHS, offset is ExtReg.
6446 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
6447 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6448 [=](MachineInstrBuilder &MIB) {
6449 MIB.addImm(SignExtend);
6450 MIB.addImm(0);
6451 }}};
6452}
6453
6454/// Select a "register plus unscaled signed 9-bit immediate" address. This
6455/// should only match when there is an offset that is not valid for a scaled
6456/// immediate addressing mode. The "Size" argument is the size in bytes of the
6457/// memory reference, which is needed here to know what is valid for a scaled
6458/// immediate.
6459InstructionSelector::ComplexRendererFns
6460AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
6461 unsigned Size) const {
6462 MachineRegisterInfo &MRI =
6463 Root.getParent()->getParent()->getParent()->getRegInfo();
6464
6465 if (!Root.isReg())
6466 return None;
6467
6468 if (!isBaseWithConstantOffset(Root, MRI))
6469 return None;
6470
6471 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6472 if (!RootDef)
6473 return None;
6474
6475 MachineOperand &OffImm = RootDef->getOperand(2);
6476 if (!OffImm.isReg())
6477 return None;
6478 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
6479 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
6480 return None;
6481 int64_t RHSC;
6482 MachineOperand &RHSOp1 = RHS->getOperand(1);
6483 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
6484 return None;
6485 RHSC = RHSOp1.getCImm()->getSExtValue();
6486
6487 // If the offset is valid as a scaled immediate, don't match here.
6488 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
6489 return None;
6490 if (RHSC >= -256 && RHSC < 256) {
6491 MachineOperand &Base = RootDef->getOperand(1);
6492 return {{
6493 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
6494 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
6495 }};
6496 }
6497 return None;
6498}
6499
6500InstructionSelector::ComplexRendererFns
6501AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
6502 unsigned Size,
6503 MachineRegisterInfo &MRI) const {
6504 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
6505 return None;
6506 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
6507 if (Adrp.getOpcode() != AArch64::ADRP)
6508 return None;
6509
6510 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
6511 auto Offset = Adrp.getOperand(1).getOffset();
6512 if (Offset % Size != 0)
6513 return None;
6514
6515 auto GV = Adrp.getOperand(1).getGlobal();
6516 if (GV->isThreadLocal())
6517 return None;
6518
6519 auto &MF = *RootDef.getParent()->getParent();
6520 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
6521 return None;
6522
6523 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
6524 MachineIRBuilder MIRBuilder(RootDef);
6525 Register AdrpReg = Adrp.getOperand(0).getReg();
6526 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
6527 [=](MachineInstrBuilder &MIB) {
6528 MIB.addGlobalAddress(GV, Offset,
6529 OpFlags | AArch64II::MO_PAGEOFF |
6530 AArch64II::MO_NC);
6531 }}};
6532}
6533
6534/// Select a "register plus scaled unsigned 12-bit immediate" address. The
6535/// "Size" argument is the size in bytes of the memory reference, which
6536/// determines the scale.
6537InstructionSelector::ComplexRendererFns
6538AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
6539 unsigned Size) const {
6540 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
6541 MachineRegisterInfo &MRI = MF.getRegInfo();
6542
6543 if (!Root.isReg())
4
Taking false branch
6544 return None;
6545
6546 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6547 if (!RootDef)
5
Assuming 'RootDef' is non-null
6
Taking false branch
6548 return None;
6549
6550 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7
Assuming the condition is false
8
Taking false branch
6551 return {{
6552 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
6553 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6554 }};
6555 }
6556
6557 CodeModel::Model CM = MF.getTarget().getCodeModel();
6558 // Check if we can fold in the ADD of small code model ADRP + ADD address.
6559 if (CM == CodeModel::Small) {
9
Assuming 'CM' is not equal to Small
10
Taking false branch
6560 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
6561 if (OpFns)
6562 return OpFns;
6563 }
6564
6565 if (isBaseWithConstantOffset(Root, MRI)) {
11
Assuming the condition is true
12
Taking true branch
6566 MachineOperand &LHS = RootDef->getOperand(1);
6567 MachineOperand &RHS = RootDef->getOperand(2);
6568 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
6569 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
6570 if (LHSDef && RHSDef) {
13
Assuming 'LHSDef' is non-null
14
Assuming 'RHSDef' is non-null
15
Taking true branch
6571 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
6572 unsigned Scale = Log2_32(Size);
16
Calling 'Log2_32'
18
Returning from 'Log2_32'
19
'Scale' initialized to 4294967295
6573 if ((RHSC & (Size - 1)) == 0 && RHSC
20.1
'RHSC' is >= 0
20.1
'RHSC' is >= 0
>= 0 && RHSC < (0x1000 << Scale)) {
20
Assuming the condition is true
21
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
6574 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
6575 return {{
6576 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
6577 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6578 }};
6579
6580 return {{
6581 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
6582 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6583 }};
6584 }
6585 }
6586 }
6587
6588 // Before falling back to our general case, check if the unscaled
6589 // instructions can handle this. If so, that's preferable.
6590 if (selectAddrModeUnscaled(Root, Size).hasValue())
6591 return None;
6592
6593 return {{
6594 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6595 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6596 }};
6597}
6598
6599/// Given a shift instruction, return the correct shift type for that
6600/// instruction.
6601static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
6602 switch (MI.getOpcode()) {
6603 default:
6604 return AArch64_AM::InvalidShiftExtend;
6605 case TargetOpcode::G_SHL:
6606 return AArch64_AM::LSL;
6607 case TargetOpcode::G_LSHR:
6608 return AArch64_AM::LSR;
6609 case TargetOpcode::G_ASHR:
6610 return AArch64_AM::ASR;
6611 case TargetOpcode::G_ROTR:
6612 return AArch64_AM::ROR;
6613 }
6614}
6615
6616/// Select a "shifted register" operand. If the value is not shifted, set the
6617/// shift operand to a default value of "lsl 0".
6618InstructionSelector::ComplexRendererFns
6619AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
6620 bool AllowROR) const {
6621 if (!Root.isReg())
6622 return None;
6623 MachineRegisterInfo &MRI =
6624 Root.getParent()->getParent()->getParent()->getRegInfo();
6625
6626 // Check if the operand is defined by an instruction which corresponds to
6627 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
6628 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
6629 if (!ShiftInst)
6630 return None;
6631 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
6632 if (ShType == AArch64_AM::InvalidShiftExtend)
6633 return None;
6634 if (ShType == AArch64_AM::ROR && !AllowROR)
6635 return None;
6636 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
6637 return None;
6638
6639 // Need an immediate on the RHS.
6640 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
6641 auto Immed = getImmedFromMO(ShiftRHS);
6642 if (!Immed)
6643 return None;
6644
6645 // We have something that we can fold. Fold in the shift's LHS and RHS into
6646 // the instruction.
6647 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
6648 Register ShiftReg = ShiftLHS.getReg();
6649
6650 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
6651 unsigned Val = *Immed & (NumBits - 1);
6652 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
6653
6654 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
6655 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
6656}
6657
6658AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
6659 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
6660 unsigned Opc = MI.getOpcode();
6661
6662 // Handle explicit extend instructions first.
6663 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
6664 unsigned Size;
6665 if (Opc == TargetOpcode::G_SEXT)
6666 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6667 else
6668 Size = MI.getOperand(2).getImm();
6669 assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?"
) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6669, __extension__ __PRETTY_FUNCTION__))
;
6670 switch (Size) {
6671 case 8:
6672 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
6673 case 16:
6674 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
6675 case 32:
6676 return AArch64_AM::SXTW;
6677 default:
6678 return AArch64_AM::InvalidShiftExtend;
6679 }
6680 }
6681
6682 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
6683 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6684 assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?"
) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6684, __extension__ __PRETTY_FUNCTION__))
;
6685 switch (Size) {
6686 case 8:
6687 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
6688 case 16:
6689 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
6690 case 32:
6691 return AArch64_AM::UXTW;
6692 default:
6693 return AArch64_AM::InvalidShiftExtend;
6694 }
6695 }
6696
6697 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
6698 // on the RHS.
6699 if (Opc != TargetOpcode::G_AND)
6700 return AArch64_AM::InvalidShiftExtend;
6701
6702 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
6703 if (!MaybeAndMask)
6704 return AArch64_AM::InvalidShiftExtend;
6705 uint64_t AndMask = *MaybeAndMask;
6706 switch (AndMask) {
6707 default:
6708 return AArch64_AM::InvalidShiftExtend;
6709 case 0xFF:
6710 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
6711 case 0xFFFF:
6712 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
6713 case 0xFFFFFFFF:
6714 return AArch64_AM::UXTW;
6715 }
6716}
6717
6718Register AArch64InstructionSelector::moveScalarRegClass(
6719 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
6720 MachineRegisterInfo &MRI = *MIB.getMRI();
6721 auto Ty = MRI.getType(Reg);
6722 assert(!Ty.isVector() && "Expected scalars only!")(static_cast <bool> (!Ty.isVector() && "Expected scalars only!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalars only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6722, __extension__ __PRETTY_FUNCTION__))
;
6723 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
6724 return Reg;
6725
6726 // Create a copy and immediately select it.
6727 // FIXME: We should have an emitCopy function?
6728 auto Copy = MIB.buildCopy({&RC}, {Reg});
6729 selectCopy(*Copy, TII, MRI, TRI, RBI);
6730 return Copy.getReg(0);
6731}
6732
6733/// Select an "extended register" operand. This operand folds in an extend
6734/// followed by an optional left shift.
6735InstructionSelector::ComplexRendererFns
6736AArch64InstructionSelector::selectArithExtendedRegister(
6737 MachineOperand &Root) const {
6738 if (!Root.isReg())
6739 return None;
6740 MachineRegisterInfo &MRI =
6741 Root.getParent()->getParent()->getParent()->getRegInfo();
6742
6743 uint64_t ShiftVal = 0;
6744 Register ExtReg;
6745 AArch64_AM::ShiftExtendType Ext;
6746 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
6747 if (!RootDef)
6748 return None;
6749
6750 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
6751 return None;
6752
6753 // Check if we can fold a shift and an extend.
6754 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
6755 // Look for a constant on the RHS of the shift.
6756 MachineOperand &RHS = RootDef->getOperand(2);
6757 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
6758 if (!MaybeShiftVal)
6759 return None;
6760 ShiftVal = *MaybeShiftVal;
6761 if (ShiftVal > 4)
6762 return None;
6763 // Look for a valid extend instruction on the LHS of the shift.
6764 MachineOperand &LHS = RootDef->getOperand(1);
6765 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
6766 if (!ExtDef)
6767 return None;
6768 Ext = getExtendTypeForInst(*ExtDef, MRI);
6769 if (Ext == AArch64_AM::InvalidShiftExtend)
6770 return None;
6771 ExtReg = ExtDef->getOperand(1).getReg();
6772 } else {
6773 // Didn't get a shift. Try just folding an extend.
6774 Ext = getExtendTypeForInst(*RootDef, MRI);
6775 if (Ext == AArch64_AM::InvalidShiftExtend)
6776 return None;
6777 ExtReg = RootDef->getOperand(1).getReg();
6778
6779 // If we have a 32 bit instruction which zeroes out the high half of a
6780 // register, we get an implicit zero extend for free. Check if we have one.
6781 // FIXME: We actually emit the extend right now even though we don't have
6782 // to.
6783 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
6784 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
6785 if (ExtInst && isDef32(*ExtInst))
6786 return None;
6787 }
6788 }
6789
6790 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
6791 // copy.
6792 MachineIRBuilder MIB(*RootDef);
6793 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
6794
6795 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6796 [=](MachineInstrBuilder &MIB) {
6797 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
6798 }}};
6799}
6800
6801void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
6802 const MachineInstr &MI,
6803 int OpIdx) const {
6804 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6805 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6806, __extension__ __PRETTY_FUNCTION__))
6806 "Expected G_CONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6806, __extension__ __PRETTY_FUNCTION__))
;
6807 Optional<int64_t> CstVal =
6808 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
6809 assert(CstVal && "Expected constant value")(static_cast <bool> (CstVal && "Expected constant value"
) ? void (0) : __assert_fail ("CstVal && \"Expected constant value\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6809, __extension__ __PRETTY_FUNCTION__))
;
6810 MIB.addImm(CstVal.getValue());
6811}
6812
6813void AArch64InstructionSelector::renderLogicalImm32(
6814 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6815 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6816, __extension__ __PRETTY_FUNCTION__))
6816 "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6816, __extension__ __PRETTY_FUNCTION__))
;
6817 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6818 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
6819 MIB.addImm(Enc);
6820}
6821
6822void AArch64InstructionSelector::renderLogicalImm64(
6823 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6824 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6825, __extension__ __PRETTY_FUNCTION__))
6825 "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6825, __extension__ __PRETTY_FUNCTION__))
;
6826 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6827 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
6828 MIB.addImm(Enc);
6829}
6830
6831void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
6832 const MachineInstr &MI,
6833 int OpIdx) const {
6834 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6835, __extension__ __PRETTY_FUNCTION__))
6835 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6835, __extension__ __PRETTY_FUNCTION__))
;
6836 MIB.addImm(
6837 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6838}
6839
6840void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
6841 const MachineInstr &MI,
6842 int OpIdx) const {
6843 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6844, __extension__ __PRETTY_FUNCTION__))
6844 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6844, __extension__ __PRETTY_FUNCTION__))
;
6845 MIB.addImm(
6846 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6847}
6848
6849void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
6850 const MachineInstr &MI,
6851 int OpIdx) const {
6852 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6853, __extension__ __PRETTY_FUNCTION__))
6853 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6853, __extension__ __PRETTY_FUNCTION__))
;
6854 MIB.addImm(
6855 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6856}
6857
6858void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
6859 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
6860 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6861, __extension__ __PRETTY_FUNCTION__))
6861 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6861, __extension__ __PRETTY_FUNCTION__))
;
6862 MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
6863 .getFPImm()
6864 ->getValueAPF()
6865 .bitcastToAPInt()
6866 .getZExtValue()));
6867}
6868
6869bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
6870 const MachineInstr &MI, unsigned NumBytes) const {
6871 if (!MI.mayLoadOrStore())
6872 return false;
6873 assert(MI.hasOneMemOperand() &&(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!"
) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6874, __extension__ __PRETTY_FUNCTION__))
6874 "Expected load/store to have only one mem op!")(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!"
) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6874, __extension__ __PRETTY_FUNCTION__))
;
6875 return (*MI.memoperands_begin())->getSize() == NumBytes;
6876}
6877
6878bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
6879 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6880 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
6881 return false;
6882
6883 // Only return true if we know the operation will zero-out the high half of
6884 // the 64-bit register. Truncates can be subregister copies, which don't
6885 // zero out the high bits. Copies and other copy-like instructions can be
6886 // fed by truncates, or could be lowered as subregister copies.
6887 switch (MI.getOpcode()) {
6888 default:
6889 return true;
6890 case TargetOpcode::COPY:
6891 case TargetOpcode::G_BITCAST:
6892 case TargetOpcode::G_TRUNC:
6893 case TargetOpcode::G_PHI:
6894 return false;
6895 }
6896}
6897
6898
6899// Perform fixups on the given PHI instruction's operands to force them all
6900// to be the same as the destination regbank.
6901static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
6902 const AArch64RegisterBankInfo &RBI) {
6903 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_PHI
&& "Expected a G_PHI") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_PHI && \"Expected a G_PHI\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6903, __extension__ __PRETTY_FUNCTION__))
;
6904 Register DstReg = MI.getOperand(0).getReg();
6905 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
6906 assert(DstRB && "Expected PHI dst to have regbank assigned")(static_cast <bool> (DstRB && "Expected PHI dst to have regbank assigned"
) ? void (0) : __assert_fail ("DstRB && \"Expected PHI dst to have regbank assigned\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6906, __extension__ __PRETTY_FUNCTION__))
;
6907 MachineIRBuilder MIB(MI);
6908
6909 // Go through each operand and ensure it has the same regbank.
6910 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
6911 if (!MO.isReg())
6912 continue;
6913 Register OpReg = MO.getReg();
6914 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
6915 if (RB != DstRB) {
6916 // Insert a cross-bank copy.
6917 auto *OpDef = MRI.getVRegDef(OpReg);
6918 const LLT &Ty = MRI.getType(OpReg);
6919 MachineBasicBlock &OpDefBB = *OpDef->getParent();
6920
6921 // Any instruction we insert must appear after all PHIs in the block
6922 // for the block to be valid MIR.
6923 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
6924 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
6925 InsertPt = OpDefBB.getFirstNonPHI();
6926 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
6927 auto Copy = MIB.buildCopy(Ty, OpReg);
6928 MRI.setRegBank(Copy.getReg(0), *DstRB);
6929 MO.setReg(Copy.getReg(0));
6930 }
6931 }
6932}
6933
6934void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
6935 // We're looking for PHIs, build a list so we don't invalidate iterators.
6936 MachineRegisterInfo &MRI = MF.getRegInfo();
6937 SmallVector<MachineInstr *, 32> Phis;
6938 for (auto &BB : MF) {
6939 for (auto &MI : BB) {
6940 if (MI.getOpcode() == TargetOpcode::G_PHI)
6941 Phis.emplace_back(&MI);
6942 }
6943 }
6944
6945 for (auto *MI : Phis) {
6946 // We need to do some work here if the operand types are < 16 bit and they
6947 // are split across fpr/gpr banks. Since all types <32b on gpr
6948 // end up being assigned gpr32 regclasses, we can end up with PHIs here
6949 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
6950 // be selecting heterogenous regbanks for operands if possible, but we
6951 // still need to be able to deal with it here.
6952 //
6953 // To fix this, if we have a gpr-bank operand < 32b in size and at least
6954 // one other operand is on the fpr bank, then we add cross-bank copies
6955 // to homogenize the operand banks. For simplicity the bank that we choose
6956 // to settle on is whatever bank the def operand has. For example:
6957 //
6958 // %endbb:
6959 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
6960 // =>
6961 // %bb2:
6962 // ...
6963 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
6964 // ...
6965 // %endbb:
6966 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
6967 bool HasGPROp = false, HasFPROp = false;
6968 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
6969 if (!MO.isReg())
6970 continue;
6971 const LLT &Ty = MRI.getType(MO.getReg());
6972 if (!Ty.isValid() || !Ty.isScalar())
6973 break;
6974 if (Ty.getSizeInBits() >= 32)
6975 break;
6976 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
6977 // If for some reason we don't have a regbank yet. Don't try anything.
6978 if (!RB)
6979 break;
6980
6981 if (RB->getID() == AArch64::GPRRegBankID)
6982 HasGPROp = true;
6983 else
6984 HasFPROp = true;
6985 }
6986 // We have heterogenous regbanks, need to fixup.
6987 if (HasGPROp && HasFPROp)
6988 fixupPHIOpBanks(*MI, MRI, RBI);
6989 }
6990}
6991
6992namespace llvm {
6993InstructionSelector *
6994createAArch64InstructionSelector(const AArch64TargetMachine &TM,
6995 AArch64Subtarget &Subtarget,
6996 AArch64RegisterBankInfo &RBI) {
6997 return new AArch64InstructionSelector(TM, Subtarget, RBI);
6998}
6999}

/build/llvm-toolchain-snapshot-15~++20220419111428+a65f2730d291/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <cassert>
18#include <climits>
19#include <cmath>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB != ZB_Undefined && Val == 0)
117 return 32;
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB != ZB_Undefined && Val == 0)
133 return 64;
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "llvm/include/llvm/Support/MathExtras.h", 251, __extension__
__PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315#if __has_builtin(__builtin_bitreverse8)1
316template<>
317inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
318 return __builtin_bitreverse8(Val);
319}
320#endif
321
322#if __has_builtin(__builtin_bitreverse16)1
323template<>
324inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
325 return __builtin_bitreverse16(Val);
326}
327#endif
328
329#if __has_builtin(__builtin_bitreverse32)1
330template<>
331inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
332 return __builtin_bitreverse32(Val);
333}
334#endif
335
336#if __has_builtin(__builtin_bitreverse64)1
337template<>
338inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
339 return __builtin_bitreverse64(Val);
340}
341#endif
342
343// NOTE: The following support functions use the _32/_64 extensions instead of
344// type overloading so that signed and unsigned integers can be used without
345// ambiguity.
346
347/// Return the high 32 bits of a 64 bit value.
348constexpr inline uint32_t Hi_32(uint64_t Value) {
349 return static_cast<uint32_t>(Value >> 32);
350}
351
352/// Return the low 32 bits of a 64 bit value.
353constexpr inline uint32_t Lo_32(uint64_t Value) {
354 return static_cast<uint32_t>(Value);
355}
356
357/// Make a 64-bit integer from a high / low pair of 32-bit integers.
358constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
359 return ((uint64_t)High << 32) | (uint64_t)Low;
360}
361
362/// Checks if an integer fits into the given bit width.
363template <unsigned N> constexpr inline bool isInt(int64_t x) {
364 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
365}
366// Template specializations to get better code for common cases.
367template <> constexpr inline bool isInt<8>(int64_t x) {
368 return static_cast<int8_t>(x) == x;
369}
370template <> constexpr inline bool isInt<16>(int64_t x) {
371 return static_cast<int16_t>(x) == x;
372}
373template <> constexpr inline bool isInt<32>(int64_t x) {
374 return static_cast<int32_t>(x) == x;
375}
376
377/// Checks if a signed integer is an N bit number shifted left by S.
378template <unsigned N, unsigned S>
379constexpr inline bool isShiftedInt(int64_t x) {
380 static_assert(
381 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
382 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
383 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
384}
385
386/// Checks if an unsigned integer fits into the given bit width.
387///
388/// This is written as two functions rather than as simply
389///
390/// return N >= 64 || X < (UINT64_C(1) << N);
391///
392/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
393/// left too many places.
394template <unsigned N>
395constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
396 static_assert(N > 0, "isUInt<0> doesn't make sense");
397 return X < (UINT64_C(1)1UL << (N));
398}
399template <unsigned N>
400constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
401 return true;
402}
403
404// Template specializations to get better code for common cases.
405template <> constexpr inline bool isUInt<8>(uint64_t x) {
406 return static_cast<uint8_t>(x) == x;
407}
408template <> constexpr inline bool isUInt<16>(uint64_t x) {
409 return static_cast<uint16_t>(x) == x;
410}
411template <> constexpr inline bool isUInt<32>(uint64_t x) {
412 return static_cast<uint32_t>(x) == x;
413}
414
415/// Checks if a unsigned integer is an N bit number shifted left by S.
416template <unsigned N, unsigned S>
417constexpr inline bool isShiftedUInt(uint64_t x) {
418 static_assert(
419 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
420 static_assert(N + S <= 64,
421 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
422 // Per the two static_asserts above, S must be strictly less than 64. So
423 // 1 << S is not undefined behavior.
424 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
425}
426
427/// Gets the maximum value for a N-bit unsigned integer.
428inline uint64_t maxUIntN(uint64_t N) {
429 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 429, __extension__
__PRETTY_FUNCTION__))
;
430
431 // uint64_t(1) << 64 is undefined behavior, so we can't do
432 // (uint64_t(1) << N) - 1
433 // without checking first that N != 64. But this works and doesn't have a
434 // branch.
435 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
436}
437
438/// Gets the minimum value for a N-bit signed integer.
439inline int64_t minIntN(int64_t N) {
440 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 440, __extension__
__PRETTY_FUNCTION__))
;
441
442 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
443}
444
445/// Gets the maximum value for a N-bit signed integer.
446inline int64_t maxIntN(int64_t N) {
447 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 447, __extension__
__PRETTY_FUNCTION__))
;
448
449 // This relies on two's complement wraparound when N == 64, so we convert to
450 // int64_t only at the very end to avoid UB.
451 return (UINT64_C(1)1UL << (N - 1)) - 1;
452}
453
454/// Checks if an unsigned integer fits into the given (dynamic) bit width.
455inline bool isUIntN(unsigned N, uint64_t x) {
456 return N >= 64 || x <= maxUIntN(N);
457}
458
459/// Checks if an signed integer fits into the given (dynamic) bit width.
460inline bool isIntN(unsigned N, int64_t x) {
461 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
462}
463
464/// Return true if the argument is a non-empty sequence of ones starting at the
465/// least significant bit with the remainder zero (32 bit version).
466/// Ex. isMask_32(0x0000FFFFU) == true.
467constexpr inline bool isMask_32(uint32_t Value) {
468 return Value && ((Value + 1) & Value) == 0;
469}
470
471/// Return true if the argument is a non-empty sequence of ones starting at the
472/// least significant bit with the remainder zero (64 bit version).
473constexpr inline bool isMask_64(uint64_t Value) {
474 return Value && ((Value + 1) & Value) == 0;
475}
476
477/// Return true if the argument contains a non-empty sequence of ones with the
478/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
479constexpr inline bool isShiftedMask_32(uint32_t Value) {
480 return Value && isMask_32((Value - 1) | Value);
481}
482
483/// Return true if the argument contains a non-empty sequence of ones with the
484/// remainder zero (64 bit version.)
485constexpr inline bool isShiftedMask_64(uint64_t Value) {
486 return Value && isMask_64((Value - 1) | Value);
487}
488
489/// Return true if the argument is a power of two > 0.
490/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
491constexpr inline bool isPowerOf2_32(uint32_t Value) {
492 return Value && !(Value & (Value - 1));
493}
494
495/// Return true if the argument is a power of two > 0 (64 bit edition.)
496constexpr inline bool isPowerOf2_64(uint64_t Value) {
497 return Value && !(Value & (Value - 1));
498}
499
500/// Count the number of ones from the most significant bit to the first
501/// zero bit.
502///
503/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
504/// Only unsigned integral types are allowed.
505///
506/// \param ZB the behavior on an input of all ones. Only ZB_Width and
507/// ZB_Undefined are valid arguments.
508template <typename T>
509unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
510 static_assert(std::numeric_limits<T>::is_integer &&
511 !std::numeric_limits<T>::is_signed,
512 "Only unsigned integral types are allowed.");
513 return countLeadingZeros<T>(~Value, ZB);
514}
515
516/// Count the number of ones from the least significant bit to the first
517/// zero bit.
518///
519/// Ex. countTrailingOnes(0x00FF00FF) == 8.
520/// Only unsigned integral types are allowed.
521///
522/// \param ZB the behavior on an input of all ones. Only ZB_Width and
523/// ZB_Undefined are valid arguments.
524template <typename T>
525unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
526 static_assert(std::numeric_limits<T>::is_integer &&
527 !std::numeric_limits<T>::is_signed,
528 "Only unsigned integral types are allowed.");
529 return countTrailingZeros<T>(~Value, ZB);
530}
531
532namespace detail {
533template <typename T, std::size_t SizeOfT> struct PopulationCounter {
534 static unsigned count(T Value) {
535 // Generic version, forward to 32 bits.
536 static_assert(SizeOfT <= 4, "Not implemented!");
537#if defined(__GNUC__4)
538 return __builtin_popcount(Value);
539#else
540 uint32_t v = Value;
541 v = v - ((v >> 1) & 0x55555555);
542 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
543 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
544#endif
545 }
546};
547
548template <typename T> struct PopulationCounter<T, 8> {
549 static unsigned count(T Value) {
550#if defined(__GNUC__4)
551 return __builtin_popcountll(Value);
552#else
553 uint64_t v = Value;
554 v = v - ((v >> 1) & 0x5555555555555555ULL);
555 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
556 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
557 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
558#endif
559 }
560};
561} // namespace detail
562
563/// Count the number of set bits in a value.
564/// Ex. countPopulation(0xF000F000) = 8
565/// Returns 0 if the word is zero.
566template <typename T>
567inline unsigned countPopulation(T Value) {
568 static_assert(std::numeric_limits<T>::is_integer &&
569 !std::numeric_limits<T>::is_signed,
570 "Only unsigned integral types are allowed.");
571 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
572}
573
574/// Return true if the argument contains a non-empty sequence of ones with the
575/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
576/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
577/// MaskLen is updated to specify the length of the mask, else neither are
578/// updated.
579inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
580 unsigned &MaskLen) {
581 if (!isShiftedMask_32(Value))
582 return false;
583 MaskIdx = countTrailingZeros(Value);
584 MaskLen = countPopulation(Value);
585 return true;
586}
587
588/// Return true if the argument contains a non-empty sequence of ones with the
589/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
590/// of the lowest set bit and \p MaskLen is updated to specify the length of the
591/// mask, else neither are updated.
592inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
593 unsigned &MaskLen) {
594 if (!isShiftedMask_64(Value))
595 return false;
596 MaskIdx = countTrailingZeros(Value);
597 MaskLen = countPopulation(Value);
598 return true;
599}
600
601/// Compile time Log2.
602/// Valid only for positive powers of two.
603template <size_t kValue> constexpr inline size_t CTLog2() {
604 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
605 "Value is not a valid power of 2");
606 return 1 + CTLog2<kValue / 2>();
607}
608
609template <> constexpr inline size_t CTLog2<1>() { return 0; }
610
611/// Return the log base 2 of the specified value.
612inline double Log2(double Value) {
613#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
614 return __builtin_log(Value) / __builtin_log(2.0);
615#else
616 return log2(Value);
617#endif
618}
619
620/// Return the floor log base 2 of the specified value, -1 if the value is zero.
621/// (32 bit edition.)
622/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
623inline unsigned Log2_32(uint32_t Value) {
624 return 31 - countLeadingZeros(Value);
17
Returning the value 4294967295
625}
626
627/// Return the floor log base 2 of the specified value, -1 if the value is zero.
628/// (64 bit edition.)
629inline unsigned Log2_64(uint64_t Value) {
630 return 63 - countLeadingZeros(Value);
631}
632
633/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
634/// (32 bit edition).
635/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
636inline unsigned Log2_32_Ceil(uint32_t Value) {
637 return 32 - countLeadingZeros(Value - 1);
638}
639
640/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
641/// (64 bit edition.)
642inline unsigned Log2_64_Ceil(uint64_t Value) {
643 return 64 - countLeadingZeros(Value - 1);
644}
645
646/// Return the greatest common divisor of the values using Euclid's algorithm.
647template <typename T>
648inline T greatestCommonDivisor(T A, T B) {
649 while (B) {
650 T Tmp = B;
651 B = A % B;
652 A = Tmp;
653 }
654 return A;
655}
656
657inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
658 return greatestCommonDivisor<uint64_t>(A, B);
659}
660
661/// This function takes a 64-bit integer and returns the bit equivalent double.
662inline double BitsToDouble(uint64_t Bits) {
663 double D;
664 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
665 memcpy(&D, &Bits, sizeof(Bits));
666 return D;
667}
668
669/// This function takes a 32-bit integer and returns the bit equivalent float.
670inline float BitsToFloat(uint32_t Bits) {
671 float F;
672 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
673 memcpy(&F, &Bits, sizeof(Bits));
674 return F;
675}
676
677/// This function takes a double and returns the bit equivalent 64-bit integer.
678/// Note that copying doubles around changes the bits of NaNs on some hosts,
679/// notably x86, so this routine cannot be used if these bits are needed.
680inline uint64_t DoubleToBits(double Double) {
681 uint64_t Bits;
682 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
683 memcpy(&Bits, &Double, sizeof(Double));
684 return Bits;
685}
686
687/// This function takes a float and returns the bit equivalent 32-bit integer.
688/// Note that copying floats around changes the bits of NaNs on some hosts,
689/// notably x86, so this routine cannot be used if these bits are needed.
690inline uint32_t FloatToBits(float Float) {
691 uint32_t Bits;
692 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
693 memcpy(&Bits, &Float, sizeof(Float));
694 return Bits;
695}
696
697/// A and B are either alignments or offsets. Return the minimum alignment that
698/// may be assumed after adding the two together.
699constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
700 // The largest power of 2 that divides both A and B.
701 //
702 // Replace "-Value" by "1+~Value" in the following commented code to avoid
703 // MSVC warning C4146
704 // return (A | B) & -(A | B);
705 return (A | B) & (1 + ~(A | B));
706}
707
708/// Returns the next power of two (in 64-bits) that is strictly greater than A.
709/// Returns zero on overflow.
710constexpr inline uint64_t NextPowerOf2(uint64_t A) {
711 A |= (A >> 1);
712 A |= (A >> 2);
713 A |= (A >> 4);
714 A |= (A >> 8);
715 A |= (A >> 16);
716 A |= (A >> 32);
717 return A + 1;
718}
719
720/// Returns the power of two which is less than or equal to the given value.
721/// Essentially, it is a floor operation across the domain of powers of two.
722inline uint64_t PowerOf2Floor(uint64_t A) {
723 if (!A) return 0;
724 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
725}
726
727/// Returns the power of two which is greater than or equal to the given value.
728/// Essentially, it is a ceil operation across the domain of powers of two.
729inline uint64_t PowerOf2Ceil(uint64_t A) {
730 if (!A)
731 return 0;
732 return NextPowerOf2(A - 1);
733}
734
735/// Returns the next integer (mod 2**64) that is greater than or equal to
736/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
737///
738/// If non-zero \p Skew is specified, the return value will be a minimal
739/// integer that is greater than or equal to \p Value and equal to
740/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
741/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
742///
743/// Examples:
744/// \code
745/// alignTo(5, 8) = 8
746/// alignTo(17, 8) = 24
747/// alignTo(~0LL, 8) = 0
748/// alignTo(321, 255) = 510
749///
750/// alignTo(5, 8, 7) = 7
751/// alignTo(17, 8, 1) = 17
752/// alignTo(~0LL, 8, 3) = 3
753/// alignTo(321, 255, 42) = 552
754/// \endcode
755inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
756 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 756, __extension__
__PRETTY_FUNCTION__))
;
757 Skew %= Align;
758 return (Value + Align - 1 - Skew) / Align * Align + Skew;
759}
760
761/// Returns the next integer (mod 2**64) that is greater than or equal to
762/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
763template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
764 static_assert(Align != 0u, "Align must be non-zero");
765 return (Value + Align - 1) / Align * Align;
766}
767
768/// Returns the integer ceil(Numerator / Denominator).
769inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
770 return alignTo(Numerator, Denominator) / Denominator;
771}
772
773/// Returns the integer nearest(Numerator / Denominator).
774inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
775 return (Numerator + (Denominator / 2)) / Denominator;
776}
777
778/// Returns the largest uint64_t less than or equal to \p Value and is
779/// \p Skew mod \p Align. \p Align must be non-zero
780inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
781 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 781, __extension__
__PRETTY_FUNCTION__))
;
782 Skew %= Align;
783 return (Value - Skew) / Align * Align + Skew;
784}
785
786/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
787/// Requires 0 < B <= 32.
788template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
789 static_assert(B > 0, "Bit width can't be 0.");
790 static_assert(B <= 32, "Bit width out of range.");
791 return int32_t(X << (32 - B)) >> (32 - B);
792}
793
794/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
795/// Requires 0 < B <= 32.
796inline int32_t SignExtend32(uint32_t X, unsigned B) {
797 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 797, __extension__
__PRETTY_FUNCTION__))
;
798 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 798, __extension__
__PRETTY_FUNCTION__))
;
799 return int32_t(X << (32 - B)) >> (32 - B);
800}
801
802/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
803/// Requires 0 < B <= 64.
804template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
805 static_assert(B > 0, "Bit width can't be 0.");
806 static_assert(B <= 64, "Bit width out of range.");
807 return int64_t(x << (64 - B)) >> (64 - B);
808}
809
810/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
811/// Requires 0 < B <= 64.
812inline int64_t SignExtend64(uint64_t X, unsigned B) {
813 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 813, __extension__
__PRETTY_FUNCTION__))
;
814 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 814, __extension__
__PRETTY_FUNCTION__))
;
815 return int64_t(X << (64 - B)) >> (64 - B);
816}
817
818/// Subtract two unsigned integers, X and Y, of type T and return the absolute
819/// value of the result.
820template <typename T>
821std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
822 return X > Y ? (X - Y) : (Y - X);
823}
824
825/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
826/// maximum representable value of T on overflow. ResultOverflowed indicates if
827/// the result is larger than the maximum representable value of type T.
828template <typename T>
829std::enable_if_t<std::is_unsigned<T>::value, T>
830SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
831 bool Dummy;
832 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
833 // Hacker's Delight, p. 29
834 T Z = X + Y;
835 Overflowed = (Z < X || Z < Y);
836 if (Overflowed)
837 return std::numeric_limits<T>::max();
838 else
839 return Z;
840}
841
842/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
843/// maximum representable value of T on overflow. ResultOverflowed indicates if
844/// the result is larger than the maximum representable value of type T.
845template <typename T>
846std::enable_if_t<std::is_unsigned<T>::value, T>
847SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
848 bool Dummy;
849 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
850
851 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
852 // because it fails for uint16_t (where multiplication can have undefined
853 // behavior due to promotion to int), and requires a division in addition
854 // to the multiplication.
855
856 Overflowed = false;
857
858 // Log2(Z) would be either Log2Z or Log2Z + 1.
859 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
860 // will necessarily be less than Log2Max as desired.
861 int Log2Z = Log2_64(X) + Log2_64(Y);
862 const T Max = std::numeric_limits<T>::max();
863 int Log2Max = Log2_64(Max);
864 if (Log2Z < Log2Max) {
865 return X * Y;
866 }
867 if (Log2Z > Log2Max) {
868 Overflowed = true;
869 return Max;
870 }
871
872 // We're going to use the top bit, and maybe overflow one
873 // bit past it. Multiply all but the bottom bit then add
874 // that on at the end.
875 T Z = (X >> 1) * Y;
876 if (Z & ~(Max >> 1)) {
877 Overflowed = true;
878 return Max;
879 }
880 Z <<= 1;
881 if (X & 1)
882 return SaturatingAdd(Z, Y, ResultOverflowed);
883
884 return Z;
885}
886
887/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
888/// the product. Clamp the result to the maximum representable value of T on
889/// overflow. ResultOverflowed indicates if the result is larger than the
890/// maximum representable value of type T.
891template <typename T>
892std::enable_if_t<std::is_unsigned<T>::value, T>
893SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
894 bool Dummy;
895 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
896
897 T Product = SaturatingMultiply(X, Y, &Overflowed);
898 if (Overflowed)
899 return Product;
900
901 return SaturatingAdd(A, Product, &Overflowed);
902}
903
904/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
905extern const float huge_valf;
906
907
908/// Add two signed integers, computing the two's complement truncated result,
909/// returning true if overflow occurred.
910template <typename T>
911std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
912#if __has_builtin(__builtin_add_overflow)1
913 return __builtin_add_overflow(X, Y, &Result);
914#else
915 // Perform the unsigned addition.
916 using U = std::make_unsigned_t<T>;
917 const U UX = static_cast<U>(X);
918 const U UY = static_cast<U>(Y);
919 const U UResult = UX + UY;
920
921 // Convert to signed.
922 Result = static_cast<T>(UResult);
923
924 // Adding two positive numbers should result in a positive number.
925 if (X > 0 && Y > 0)
926 return Result <= 0;
927 // Adding two negatives should result in a negative number.
928 if (X < 0 && Y < 0)
929 return Result >= 0;
930 return false;
931#endif
932}
933
934/// Subtract two signed integers, computing the two's complement truncated
935/// result, returning true if an overflow ocurred.
936template <typename T>
937std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
938#if __has_builtin(__builtin_sub_overflow)1
939 return __builtin_sub_overflow(X, Y, &Result);
940#else
941 // Perform the unsigned addition.
942 using U = std::make_unsigned_t<T>;
943 const U UX = static_cast<U>(X);
944 const U UY = static_cast<U>(Y);
945 const U UResult = UX - UY;
946
947 // Convert to signed.
948 Result = static_cast<T>(UResult);
949
950 // Subtracting a positive number from a negative results in a negative number.
951 if (X <= 0 && Y > 0)
952 return Result >= 0;
953 // Subtracting a negative number from a positive results in a positive number.
954 if (X >= 0 && Y < 0)
955 return Result <= 0;
956 return false;
957#endif
958}
959
960/// Multiply two signed integers, computing the two's complement truncated
961/// result, returning true if an overflow ocurred.
962template <typename T>
963std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
964 // Perform the unsigned multiplication on absolute values.
965 using U = std::make_unsigned_t<T>;
966 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
967 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
968 const U UResult = UX * UY;
969
970 // Convert to signed.
971 const bool IsNegative = (X < 0) ^ (Y < 0);
972 Result = IsNegative ? (0 - UResult) : UResult;
973
974 // If any of the args was 0, result is 0 and no overflow occurs.
975 if (UX == 0 || UY == 0)
976 return false;
977
978 // UX and UY are in [1, 2^n], where n is the number of digits.
979 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
980 // positive) divided by an argument compares to the other.
981 if (IsNegative)
982 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
983 else
984 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
985}
986
987} // End llvm namespace
988
989#endif